From: Felix Geisendörfer Date: Fri, 29 Mar 2024 18:59:47 +0000 (+0100) Subject: runtime: move profiling pc buffers to m X-Git-Tag: go1.23rc1~426 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=2141315251da47745c8f649c01e598a19bd68897;p=gostls13.git runtime: move profiling pc buffers to m Move profiling pc buffers from being stack allocated to an m field. This is motivated by the next patch, which will increase the default stack depth to 128, which might lead to undesirable stack growth for goroutines that produce profiling events. Additionally, this change paves the way to make the stack depth configurable via GODEBUG. Change-Id: Ifa407f899188e2c7c0a81de92194fdb627cb4b36 Reviewed-on: https://go-review.googlesource.com/c/go/+/574699 Reviewed-by: Michael Knyszek LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Pratt --- diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 96c4761bc8..1df9006011 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1404,7 +1404,7 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { throw("profilealloc called without a P or outside bootstrapping") } c.nextSample = nextSample() - mProf_Malloc(x, size) + mProf_Malloc(mp, x, size) } // nextSample returns the next sampling point for heap profiling. The goal is diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index 26b7d78283..d4d5e285fd 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -422,15 +422,13 @@ func mProf_PostSweep() { } // Called by malloc to record a profiled block. -func mProf_Malloc(p unsafe.Pointer, size uintptr) { - var stk [maxStack]uintptr - nstk := callers(4, stk[:]) - +func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) { + nstk := callers(4, mp.profStack) index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) - b := stkbucket(memProfile, size, stk[:nstk], true) - mp := b.mp() - mpc := &mp.future[index] + b := stkbucket(memProfile, size, mp.profStack[:nstk], true) + mr := b.mp() + mpc := &mr.future[index] lock(&profMemFutureLock[index]) mpc.allocs++ @@ -505,16 +503,17 @@ func blocksampled(cycles, rate int64) bool { } func saveblockevent(cycles, rate int64, skip int, which bucketType) { - gp := getg() var nstk int - var stk [maxStack]uintptr + gp := getg() + mp := acquirem() // we must not be preempted while accessing profstack if gp.m.curg == nil || gp.m.curg == gp { - nstk = callers(skip, stk[:]) + nstk = callers(skip, mp.profStack) } else { - nstk = gcallers(gp.m.curg, skip, stk[:]) + nstk = gcallers(gp.m.curg, skip, mp.profStack) } - saveBlockEventStack(cycles, rate, stk[:nstk], which) + saveBlockEventStack(cycles, rate, mp.profStack[:nstk], which) + releasem(mp) } // lockTimer assists with profiling contention on runtime-internal locks. @@ -613,12 +612,12 @@ func (lt *lockTimer) end() { } type mLockProfile struct { - waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank - stack [maxStack]uintptr // stack that experienced contention in runtime.lockWithRank - pending uintptr // *mutex that experienced contention (to be traceback-ed) - cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack" - cyclesLost int64 // contention for which we weren't able to record a call stack - disabled bool // attribute all time to "lost" + waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank + stack []uintptr // stack that experienced contention in runtime.lockWithRank + pending uintptr // *mutex that experienced contention (to be traceback-ed) + cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack" + cyclesLost int64 // contention for which we weren't able to record a call stack + disabled bool // attribute all time to "lost" } func (prof *mLockProfile) recordLock(cycles int64, l *mutex) { @@ -703,7 +702,7 @@ func (prof *mLockProfile) captureStack() { systemstack(func() { var u unwinder u.initAt(pc, sp, 0, gp, unwindSilentErrors|unwindJumpStack) - nstk = tracebackPCs(&u, skip, prof.stack[:]) + nstk = tracebackPCs(&u, skip, prof.stack) }) if nstk < len(prof.stack) { prof.stack[nstk] = 0 diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 161d14e562..7a1c957822 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -922,6 +922,16 @@ func mcommoninit(mp *m, id int64) { if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" { mp.cgoCallers = new(cgoCallers) } + mProfStackInit(mp) +} + +// mProfStackInit is used to eagilery initialize stack trace buffers for +// profiling. Lazy allocation would have to deal with reentrancy issues in +// malloc and runtime locks for mLockProfile. +// TODO(mknyszek): Implement lazy allocation if this becomes a problem. +func mProfStackInit(mp *m) { + mp.profStack = make([]uintptr, maxStack) + mp.mLockProfile.stack = make([]uintptr, maxStack) } func (mp *m) becomeSpinning() { diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 0093a6ddb9..2ce9e8d0a2 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -599,6 +599,7 @@ type m struct { nextwaitm muintptr // next m waiting for lock mLockProfile mLockProfile // fields relating to runtime.lock contention + profStack []uintptr // used for memory/block/mutex stack traces // wait* are used to carry arguments from gopark into park_m, because // there's no stack to put them on. That is their sole purpose.