From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Tue, 10 Jun 2025 16:42:59 +0000 (+0000)
Subject: [release-branch.go1.24] runtime: handle system goroutines later in goroutine profiling
X-Git-Tag: go1.24.5~5
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=21b488bb60a48cc6b284b511855fddc3f1b1f2a8;p=gostls13.git

[release-branch.go1.24] runtime: handle system goroutines later in goroutine profiling

In Go 1.24 and earlier, it's possible for a just-starting finalizer
goroutine to have its stack traced in goroutine profiles even though
it shouldn't, because it wasn't visible to the goroutine profile STW.
This can also bump out other stacks, because the goroutine profiler
wasn't expecting to have another stack. Fix this by letting all
system goroutines participate in the goroutine profiler's state
machine, like in the CL this is cherry-picking. This ensures that the
finalizer goroutine will be counted as a system goroutine in this
just-starting state, but still composes with the old way of doing
things, because the finalizer goroutine is advanced to the terminal
state during the STW. In Go 1.25, this is fixing a slightly different
issue, but the root of the problem is the same: all goroutines should
participate in the profiler's state machine, and they do not.

For #74090.
Fixes #74363.

Change-Id: Icb9a164a033be22aaa942d19e828e895f700ca74
Reviewed-on: https://go-review.googlesource.com/c/go/+/680477
Reviewed-by: Carlos Amedee <carlos@golang.org>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
(cherry picked from commit 281cfcfc1b15fbb87fd32660b0a1d50be996d108)
Reviewed-on: https://go-review.googlesource.com/c/go/+/684017
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Dmitri Shuralyov <dmitshur@google.com>
---

diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index 3cf8dc815d..4b8a60718f 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -1503,11 +1503,6 @@ func tryRecordGoroutineProfile(gp1 *g, pcbuf []uintptr, yield func()) {
 		// so here we check _Gdead first.
 		return
 	}
-	if isSystemGoroutine(gp1, true) {
-		// System goroutines should not appear in the profile. (The finalizer
-		// goroutine is marked as "already profiled".)
-		return
-	}
 
 	for {
 		prev := gp1.goroutineProfiled.Load()
@@ -1545,6 +1540,17 @@ func tryRecordGoroutineProfile(gp1 *g, pcbuf []uintptr, yield func()) {
 // stack), or from the scheduler in preparation to execute gp1 (running on the
 // system stack).
 func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) {
+	if isSystemGoroutine(gp1, false) {
+		// System goroutines should not appear in the profile.
+		// Check this here and not in tryRecordGoroutineProfile because isSystemGoroutine
+		// may change on a goroutine while it is executing, so while the scheduler might
+		// see a system goroutine, goroutineProfileWithLabelsConcurrent might not, and
+		// this inconsistency could cause invariants to be violated, such as trying to
+		// record the stack of a running goroutine below. In short, we still want system
+		// goroutines to participate in the same state machine on gp1.goroutineProfiled as
+		// everything else, we just don't record the stack in the profile.
+		return
+	}
 	if readgstatus(gp1) == _Grunning {
 		print("doRecordGoroutineProfile gp1=", gp1.goid, "\n")
 		throw("cannot read stack of running goroutine")
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index 8a1d8e2d1f..c226ef66a2 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -1808,6 +1808,45 @@ func TestGoroutineProfileCoro(t *testing.T) {
 	goroutineProf.WriteTo(io.Discard, 1)
 }
 
+// This test tries to provoke a situation wherein the finalizer goroutine is
+// erroneously inspected by the goroutine profiler in such a way that could
+// cause a crash. See go.dev/issue/74090.
+func TestGoroutineProfileIssue74090(t *testing.T) {
+	testenv.MustHaveParallelism(t)
+
+	goroutineProf := Lookup("goroutine")
+
+	// T is a pointer type so it won't be allocated by the tiny
+	// allocator, which can lead to its finalizer not being called
+	// during this test.
+	type T *byte
+	for range 10 {
+		// We use finalizers for this test because finalizers transition between
+		// system and user goroutine on each call, since there's substantially
+		// more work to do to set up a finalizer call. Cleanups, on the other hand,
+		// transition once for a whole batch, and so are less likely to trigger
+		// the failure. Under stress testing conditions this test fails approximately
+		// 5 times every 1000 executions on a 64 core machine without the appropriate
+		// fix, which is not ideal but if this test crashes at all, it's a clear
+		// signal that something is broken.
+		var objs []*T
+		for range 10000 {
+			obj := new(T)
+			runtime.SetFinalizer(obj, func(_ interface{}) {})
+			objs = append(objs, obj)
+		}
+		objs = nil
+
+		// Queue up all the finalizers.
+		runtime.GC()
+
+		// Try to run a goroutine profile concurrently with finalizer execution
+		// to trigger the bug.
+		var w strings.Builder
+		goroutineProf.WriteTo(&w, 1)
+	}
+}
+
 func BenchmarkGoroutine(b *testing.B) {
 	withIdle := func(n int, fn func(b *testing.B)) func(b *testing.B) {
 		return func(b *testing.B) {