From: Michael Anthony Knyszek Date: Sat, 2 Aug 2025 03:26:43 +0000 (+0000) Subject: runtime: disable stack shrinking for all waiting-for-suspendG cases X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=bd07fafb0a2f979b2be05d9a533182ca55428079;p=gostls13.git runtime: disable stack shrinking for all waiting-for-suspendG cases Currently isShrinkStackSafe returns false if a goroutine is put into _Gwaiting while it actually goes and executes on the system stack. For a long time, we needed to be robust to the goroutine's stack shrinking while we're executing on the system stack. Unfortunately, this has become harder and harder to do over time. First, the execution tracer might be invoked in these contexts and it may wish to take a stack trace. We cannot take the stack trace if the garbage collector might concurrently shrink the stack of the user goroutine we want to trace. So, isShrinkStackSafe grew the condition that we wouldn't try to shrink the stack in these cases if execution tracing was enabled. Today, runtime.mutex may wish to take a stack trace for the mutex profile, and it can happen in a very similar context. Taking the stack trace is no longer safe. This change takes the stance that we stop trying to make this work at all, and instead guarantee that the stack won't move while we're in these sensitive contexts. Change-Id: Ibfad2d7a335ee97cecaa48001df0db9812deeab1 Reviewed-on: https://go-review.googlesource.com/c/go/+/692716 LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Pratt Auto-Submit: Michael Knyszek --- diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index f2df1a00e0..26cec37f74 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1521,18 +1521,15 @@ func gcBgMarkWorker(ready chan struct{}) { } systemstack(func() { - // Mark our goroutine preemptible so its stack - // can be scanned or observed by the execution - // tracer. This, for example, lets two mark workers - // scan each other (otherwise, they would - // deadlock). We must not modify anything on - // the G stack. However, stack shrinking is - // disabled for mark workers, so it is safe to - // read from the G stack. + // Mark our goroutine preemptible so its stack can be scanned or observed + // by the execution tracer. This, for example, lets two mark workers scan + // each other (otherwise, they would deadlock). // - // N.B. The execution tracer is not aware of this status - // transition and handles it specially based on the - // wait reason. + // casGToWaitingForSuspendG marks the goroutine as ineligible for a + // stack shrink, effectively pinning the stack in memory for the duration. + // + // N.B. The execution tracer is not aware of this status transition and + // handles it specially based on the wait reason. casGToWaitingForSuspendG(gp, _Grunning, waitReasonGCWorkerActive) switch pp.gcMarkWorkerMode { default: diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 25d39d9ba3..8d5f2fc793 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1372,6 +1372,9 @@ func casGToWaiting(gp *g, old uint32, reason waitReason) { // casGToWaitingForSuspendG transitions gp from old to _Gwaiting, and sets the wait reason. // The wait reason must be a valid isWaitingForSuspendG wait reason. // +// While a goroutine is in this state, it's stack is effectively pinned. +// The garbage collector must not shrink or otherwise mutate the goroutine's stack. +// // Use this over casgstatus when possible to ensure that a waitreason is set. func casGToWaitingForSuspendG(gp *g, old uint32, reason waitReason) { if !reason.isWaitingForSuspendG() { @@ -1608,18 +1611,11 @@ func stopTheWorldWithSema(reason stwReason) worldStop { // stack while we try to stop the world since otherwise we could get // in a mutual preemption deadlock. // - // We must not modify anything on the G stack because a stack shrink - // may occur, now that we switched to _Gwaiting, specifically if we're - // doing this during the mark phase (mark termination excepted, since - // we know that stack scanning is done by that point). A stack shrink - // is otherwise OK though because in order to return from this function - // (and to leave the system stack) we must have preempted all - // goroutines, including any attempting to scan our stack, in which - // case, any stack shrinking will have already completed by the time we - // exit. + // casGToWaitingForSuspendG marks the goroutine as ineligible for a + // stack shrink, effectively pinning the stack in memory for the duration. // // N.B. The execution tracer is not aware of this status transition and - // andles it specially based on the wait reason. + // handles it specially based on the wait reason. casGToWaitingForSuspendG(getg().m.curg, _Grunning, waitReasonStoppingTheWorld) trace := traceAcquire() @@ -2106,16 +2102,11 @@ func forEachP(reason waitReason, fn func(*p)) { // deadlock as we attempt to preempt a goroutine that's trying // to preempt us (e.g. for a stack scan). // - // We must not modify anything on the G stack because a stack shrink - // may occur. A stack shrink is otherwise OK though because in order - // to return from this function (and to leave the system stack) we - // must have preempted all goroutines, including any attempting - // to scan our stack, in which case, any stack shrinking will - // have already completed by the time we exit. + // casGToWaitingForSuspendG marks the goroutine as ineligible for a + // stack shrink, effectively pinning the stack in memory for the duration. // - // N.B. The execution tracer is not aware of this status - // transition and handles it specially based on the - // wait reason. + // N.B. The execution tracer is not aware of this status transition and + // handles it specially based on the wait reason. casGToWaitingForSuspendG(gp, _Grunning, reason) forEachPInternal(fn) casgstatus(gp, _Gwaiting, _Grunning) diff --git a/src/runtime/stack.go b/src/runtime/stack.go index a338708d76..d809820b07 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -1214,15 +1214,18 @@ func isShrinkStackSafe(gp *g) bool { if gp.parkingOnChan.Load() { return false } - // We also can't copy the stack while tracing is enabled, and - // gp is in _Gwaiting solely to make itself available to suspendG. + // We also can't copy the stack while a gp is in _Gwaiting solely + // to make itself available to suspendG. + // // In these cases, the G is actually executing on the system - // stack, and the execution tracer may want to take a stack trace - // of the G's stack. Note: it's safe to access gp.waitreason here. - // We're only checking if this is true if we took ownership of the + // stack, and the execution tracer, mutex profiler, etc. may want + // to take a stack trace of the G's stack. + // + // Note: it's safe to access gp.waitreason here. + // We're only calling isShrinkStackSafe if we took ownership of the // G with the _Gscan bit. This prevents the goroutine from transitioning, // which prevents gp.waitreason from changing. - if traceEnabled() && readgstatus(gp)&^_Gscan == _Gwaiting && gp.waitreason.isWaitingForSuspendG() { + if readgstatus(gp)&^_Gscan == _Gwaiting && gp.waitreason.isWaitingForSuspendG() { return false } return true @@ -1258,12 +1261,6 @@ func shrinkstack(gp *g) { if debug.gcshrinkstackoff > 0 { return } - f := findfunc(gp.startpc) - if f.valid() && f.funcID == abi.FuncID_gcBgMarkWorker { - // We're not allowed to shrink the gcBgMarkWorker - // stack (see gcBgMarkWorker for explanation). - return - } oldsize := gp.stack.hi - gp.stack.lo newsize := oldsize / 2