}
cmd.Args = append(cmd.Args, testPath)
cmd.Env = append(os.Environ(), "GOEXPERIMENT=exectracer2", "GOEXPERIMENT=rangefunc")
+ // Add a stack ownership check. This is cheap enough for testing.
+ godebug := "tracecheckstackownership=1"
if stress {
- // Advance a generation constantly.
- cmd.Env = append(cmd.Env, "GODEBUG=traceadvanceperiod=0")
+ // Advance a generation constantly to stress the tracer.
+ godebug += ",traceadvanceperiod=0"
}
+ cmd.Env = append(cmd.Env, "GODEBUG="+godebug)
+
// Capture stdout and stderr.
//
// The protocol for these programs is that stdout contains the trace data
// Park the calling goroutine.
trace := traceAcquire()
- casGToWaiting(gp, _Grunning, waitReasonDebugCall)
if trace.ok() {
+ // Trace the event before the transition. It may take a
+ // stack trace, but we won't own the stack after the
+ // transition anymore.
trace.GoPark(traceBlockDebugCall, 1)
+ }
+ casGToWaiting(gp, _Grunning, waitReasonDebugCall)
+ if trace.ok() {
traceRelease(trace)
}
dropg()
// the scheduler will schedule us again and we'll
// finish exiting.
trace := traceAcquire()
- casgstatus(gp, _Grunning, _Grunnable)
if trace.ok() {
+ // Trace the event before the transition. It may take a
+ // stack trace, but we won't own the stack after the
+ // transition anymore.
trace.GoSched()
+ }
+ casgstatus(gp, _Grunning, _Grunnable)
+ if trace.ok() {
traceRelease(trace)
}
dropg()
applies if a program is built with GOEXPERIMENT=exectracer2. Used primarily for testing
and debugging the execution tracer.
+ tracecheckstackownership: setting tracecheckstackownership=1 enables a debug check in the
+ execution tracer to double-check stack ownership before taking a stack trace.
+
asyncpreemptoff: asyncpreemptoff=1 disables signal-based
asynchronous goroutine preemption. This makes some loops
non-preemptible for long periods, which may delay GC and
// N.B. The execution tracer is not aware of this status
// transition and handles it specially based on the
// wait reason.
- casGToWaiting(curgp, _Grunning, waitReasonGarbageCollection)
+ casGToWaitingForGC(curgp, _Grunning, waitReasonGarbageCollection)
// Run gc on the g0 stack. We do this so that the g stack
// we're currently running on will no longer change. Cuts
// N.B. The execution tracer is not aware of this status
// transition and handles it specially based on the
// wait reason.
- casGToWaiting(gp, _Grunning, waitReasonGCWorkerActive)
+ casGToWaitingForGC(gp, _Grunning, waitReasonGCWorkerActive)
switch pp.gcMarkWorkerMode {
default:
throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
userG := getg().m.curg
selfScan := gp == userG && readgstatus(userG) == _Grunning
if selfScan {
- casGToWaiting(userG, _Grunning, waitReasonGarbageCollectionScan)
+ casGToWaitingForGC(userG, _Grunning, waitReasonGarbageCollectionScan)
}
// TODO: suspendG blocks (and spins) until gp
}
// gcDrainN requires the caller to be preemptible.
- casGToWaiting(gp, _Grunning, waitReasonGCAssistMarking)
+ casGToWaitingForGC(gp, _Grunning, waitReasonGCAssistMarking)
// drain own cached work first in the hopes that it
// will be more cache friendly.
casgstatus(gp, old, _Gwaiting)
}
+// casGToWaitingForGC transitions gp from old to _Gwaiting, and sets the wait reason.
+// The wait reason must be a valid isWaitingForGC wait reason.
+//
+// Use this over casgstatus when possible to ensure that a waitreason is set.
+func casGToWaitingForGC(gp *g, old uint32, reason waitReason) {
+ if !reason.isWaitingForGC() {
+ throw("casGToWaitingForGC with non-isWaitingForGC wait reason")
+ }
+ casGToWaiting(gp, old, reason)
+}
+
// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
// Returns old status. Cannot call casgstatus directly, because we are racing with an
// async wakeup that might come in from netpoll. If we see Gwaiting from the readgstatus,
// N.B. The execution tracer is not aware of this status
// transition and handles it specially based on the
// wait reason.
- casGToWaiting(gp, _Grunning, waitReasonStoppingTheWorld)
+ casGToWaitingForGC(gp, _Grunning, waitReasonStoppingTheWorld)
stopTheWorldContext = stopTheWorldWithSema(reason) // avoid write to stack
casgstatus(gp, _Gwaiting, _Grunning)
})
// N.B. The execution tracer is not aware of this status
// transition and handles it specially based on the
// wait reason.
- casGToWaiting(gp, _Grunning, reason)
+ casGToWaitingForGC(gp, _Grunning, reason)
forEachPInternal(fn)
casgstatus(gp, _Gwaiting, _Grunning)
})
trace := traceAcquire()
+ if trace.ok() {
+ // Trace the event before the transition. It may take a
+ // stack trace, but we won't own the stack after the
+ // transition anymore.
+ trace.GoPark(mp.waitTraceBlockReason, mp.waitTraceSkip)
+ }
// N.B. Not using casGToWaiting here because the waitreason is
// set by park_m's caller.
casgstatus(gp, _Grunning, _Gwaiting)
if trace.ok() {
- trace.GoPark(mp.waitTraceBlockReason, mp.waitTraceSkip)
traceRelease(trace)
}
dumpgstatus(gp)
throw("bad g status")
}
- casgstatus(gp, _Grunning, _Grunnable)
if trace.ok() {
+ // Trace the event before the transition. It may take a
+ // stack trace, but we won't own the stack after the
+ // transition anymore.
if preempted {
trace.GoPreempt()
} else {
trace.GoSched()
}
+ }
+ casgstatus(gp, _Grunning, _Grunnable)
+ if trace.ok() {
traceRelease(trace)
}
func goyield_m(gp *g) {
trace := traceAcquire()
pp := gp.m.p.ptr()
- casgstatus(gp, _Grunning, _Grunnable)
if trace.ok() {
+ // Trace the event before the transition. It may take a
+ // stack trace, but we won't own the stack after the
+ // transition anymore.
trace.GoPreempt()
+ }
+ casgstatus(gp, _Grunning, _Grunnable)
+ if trace.ok() {
traceRelease(trace)
}
dropg()
if trace.ok() {
// Pretend that we were descheduled
// and then scheduled again to keep
- // the trace sane.
+ // the trace consistent.
trace.GoSched()
trace.ProcStop(gp.m.p.ptr())
traceRelease(trace)
// existing int var for that value, which may
// already have an initial value.
var debug struct {
- cgocheck int32
- clobberfree int32
- disablethp int32
- dontfreezetheworld int32
- efence int32
- gccheckmark int32
- gcpacertrace int32
- gcshrinkstackoff int32
- gcstoptheworld int32
- gctrace int32
- invalidptr int32
- madvdontneed int32 // for Linux; issue 28466
- runtimeContentionStacks atomic.Int32
- scavtrace int32
- scheddetail int32
- schedtrace int32
- tracebackancestors int32
- asyncpreemptoff int32
- harddecommit int32
- adaptivestackstart int32
- tracefpunwindoff int32
- traceadvanceperiod int32
+ cgocheck int32
+ clobberfree int32
+ disablethp int32
+ dontfreezetheworld int32
+ efence int32
+ gccheckmark int32
+ gcpacertrace int32
+ gcshrinkstackoff int32
+ gcstoptheworld int32
+ gctrace int32
+ invalidptr int32
+ madvdontneed int32 // for Linux; issue 28466
+ runtimeContentionStacks atomic.Int32
+ scavtrace int32
+ scheddetail int32
+ schedtrace int32
+ tracebackancestors int32
+ asyncpreemptoff int32
+ harddecommit int32
+ adaptivestackstart int32
+ tracefpunwindoff int32
+ traceadvanceperiod int32
+ traceCheckStackOwnership int32
// debug.malloc is used as a combined debug check
// in the malloc function and should be set
{name: "scheddetail", value: &debug.scheddetail},
{name: "schedtrace", value: &debug.schedtrace},
{name: "traceadvanceperiod", value: &debug.traceadvanceperiod},
+ {name: "tracecheckstackownership", value: &debug.traceCheckStackOwnership},
{name: "tracebackancestors", value: &debug.tracebackancestors},
{name: "tracefpunwindoff", value: &debug.tracefpunwindoff},
}
w == waitReasonSyncRWMutexLock
}
+func (w waitReason) isWaitingForGC() bool {
+ return isWaitingForGC[w]
+}
+
+// isWaitingForGC indicates that a goroutine is only entering _Gwaiting and
+// setting a waitReason because it needs to be able to let the GC take ownership
+// of its stack. The G is always actually executing on the system stack, in
+// these cases.
+//
+// TODO(mknyszek): Consider replacing this with a new dedicated G status.
+var isWaitingForGC = [len(waitReasonStrings)]bool{
+ waitReasonStoppingTheWorld: true,
+ waitReasonGCMarkTermination: true,
+ waitReasonGarbageCollection: true,
+ waitReasonGarbageCollectionScan: true,
+ waitReasonTraceGoroutineStatus: true,
+ waitReasonTraceProcStatus: true,
+ waitReasonPageTraceFlush: true,
+ waitReasonGCAssistMarking: true,
+ waitReasonGCWorkerActive: true,
+ waitReasonFlushProcCaches: true,
+}
+
var (
allm *m
gomaxprocs int32
// isShrinkStackSafe returns whether it's safe to attempt to shrink
// gp's stack. Shrinking the stack is only safe when we have precise
-// pointer maps for all frames on the stack.
+// pointer maps for all frames on the stack. The caller must hold the
+// _Gscan bit for gp or must be running gp itself.
func isShrinkStackSafe(gp *g) bool {
// We can't copy the stack if we're in a syscall.
// The syscall might have pointers into the stack and
// often we don't have precise pointer maps for the innermost
// frames.
- //
+ if gp.syscallsp != 0 {
+ return false
+ }
// We also can't copy the stack if we're at an asynchronous
// safe-point because we don't have precise pointer maps for
// all frames.
- //
+ if gp.asyncSafePoint {
+ return false
+ }
// We also can't *shrink* the stack in the window between the
// goroutine calling gopark to park on a channel and
// gp.activeStackChans being set.
- return gp.syscallsp == 0 && !gp.asyncSafePoint && !gp.parkingOnChan.Load()
+ if gp.parkingOnChan.Load() {
+ return false
+ }
+ // We also can't copy the stack while tracing is enabled, and
+ // gp is in _Gwaiting solely to make itself available to the GC.
+ // In these cases, the G is actually executing on the system
+ // stack, and the execution tracer may want to take a stack trace
+ // of the G's stack. Note: it's safe to access gp.waitreason here.
+ // We're only checking if this is true if we took ownership of the
+ // G with the _Gscan bit. This prevents the goroutine from transitioning,
+ // which prevents gp.waitreason from changing.
+ if traceEnabled() && readgstatus(gp)&^_Gscan == _Gwaiting && gp.waitreason.isWaitingForGC() {
+ return false
+ }
+ return true
}
// Maybe shrink the stack being used by gp.
me := getg().m.curg
// We don't have to handle this G status transition because we
// already eliminated ourselves from consideration above.
- casGToWaiting(me, _Grunning, waitReasonTraceGoroutineStatus)
+ casGToWaitingForGC(me, _Grunning, waitReasonTraceGoroutineStatus)
// We need to suspend and take ownership of the G to safely read its
// goid. Note that we can't actually emit the event at this point
// because we might stop the G in a window where it's unsafe to write
mp = getg().m
gp = mp.curg
}
+
+ // Double-check that we own the stack we're about to trace.
+ if debug.traceCheckStackOwnership != 0 && gp != nil {
+ status := readgstatus(gp)
+ // If the scan bit is set, assume we're the ones that acquired it.
+ if status&_Gscan == 0 {
+ // Use the trace status to check this. There are a number of cases
+ // where a running goroutine might be in _Gwaiting, and these cases
+ // are totally fine for taking a stack trace. They're captured
+ // correctly in goStatusToTraceGoStatus.
+ switch goStatusToTraceGoStatus(status, gp.waitreason) {
+ case traceGoRunning, traceGoSyscall:
+ if getg() == gp || mp.curg == gp {
+ break
+ }
+ fallthrough
+ default:
+ print("runtime: gp=", unsafe.Pointer(gp), " gp.goid=", gp.goid, " status=", gStatusStrings[status], "\n")
+ throw("attempted to trace stack of a goroutine this thread does not own")
+ }
+ }
+ }
+
if gp != nil && mp == nil {
// We're getting the backtrace for a G that's not currently executing.
// It may still have an M, if it's locked to some M.
// emit an event, and we want these goroutines to appear in
// the final trace as if they're running, not blocked.
tgs = traceGoWaiting
- if status == _Gwaiting &&
- wr == waitReasonStoppingTheWorld ||
- wr == waitReasonGCMarkTermination ||
- wr == waitReasonGarbageCollection ||
- wr == waitReasonTraceProcStatus ||
- wr == waitReasonPageTraceFlush ||
- wr == waitReasonGCWorkerActive {
+ if status == _Gwaiting && wr.isWaitingForGC() {
tgs = traceGoRunning
}
case _Gdead: