// Note that it's only used internally as a guard against
// wildly out-of-bounds slicing of the PCs that come after
// a bucket struct, and it could increase in the future.
- // The "+ 1" is to account for the first stack entry being
+ // The term "1" accounts for the first stack entry being
// taken up by a "skip" sentinel value for profilers which
// defer inline frame expansion until the profile is reported.
- maxStack = 32 + 1
+ // The term "maxSkip" is for frame pointer unwinding, where we
+ // want to end up with maxLogicalStack frames but will discard
+ // some "physical" frames to account for skipping.
+ maxStack = 1 + maxSkip + maxLogicalStack
+
+ // maxLogicalStack is the maximum stack size of a call stack
+ // to encode in a profile. This counts "logical" frames, which
+ // includes inlined frames. We may record more than this many
+ // "physical" frames when using frame pointer unwinding to account
+ // for deferred handling of skipping frames & inline expansion.
+ maxLogicalStack = 32
+ // maxSkip is to account for deferred inline expansion
+ // when using frame pointer unwinding. We record the stack
+ // with "physical" frame pointers but handle skipping "logical"
+ // frames at some point after collecting the stack. So
+ // we need extra space in order to avoid getting fewer than the
+ // desired maximum number of frames after expansion.
+ // This should be at least as large as the largest skip value
+ // used for profiling; otherwise stacks may be truncated inconsistently
+ maxSkip = 5
)
type bucketType int
// skip should be positive if this event is recorded from the current stack
// (e.g. when this is not called from a system stack)
func saveblockevent(cycles, rate int64, skip int, which bucketType) {
+ if skip > maxSkip {
+ print("requested skip=", skip)
+ throw("invalid skip value")
+ }
+
gp := getg()
mp := acquirem() // we must not be preempted while accessing profstack
nstk := 1
}
}
}
+
+func TestProfilerStackDepth(t *testing.T) {
+ // Disable sampling, otherwise it's difficult to assert anything.
+ oldMemRate := runtime.MemProfileRate
+ runtime.MemProfileRate = 1
+ runtime.SetBlockProfileRate(1)
+ oldMutexRate := runtime.SetMutexProfileFraction(1)
+ t.Cleanup(func() {
+ runtime.MemProfileRate = oldMemRate
+ runtime.SetBlockProfileRate(0)
+ runtime.SetMutexProfileFraction(oldMutexRate)
+ })
+
+ const depth = 32
+ go produceProfileEvents(t, depth)
+ awaitBlockedGoroutine(t, "chan receive", "goroutineDeep", 1)
+
+ tests := []struct {
+ profiler string
+ prefix []string
+ }{
+ {"heap", []string{"runtime/pprof.allocDeep"}},
+ {"block", []string{"runtime.chanrecv1", "runtime/pprof.blockChanDeep"}},
+ {"mutex", []string{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutexDeep"}},
+ {"goroutine", []string{"runtime.gopark", "runtime.chanrecv", "runtime.chanrecv1", "runtime/pprof.goroutineDeep"}},
+ }
+
+ for _, test := range tests {
+ t.Run(test.profiler, func(t *testing.T) {
+ var buf bytes.Buffer
+ if err := Lookup(test.profiler).WriteTo(&buf, 0); err != nil {
+ t.Fatalf("failed to write heap profile: %v", err)
+ }
+ p, err := profile.Parse(&buf)
+ if err != nil {
+ t.Fatalf("failed to parse heap profile: %v", err)
+ }
+ t.Logf("Profile = %v", p)
+
+ stks := stacks(p)
+ var stk []string
+ for _, s := range stks {
+ if hasPrefix(s, test.prefix) {
+ stk = s
+ break
+ }
+ }
+ if len(stk) != depth {
+ t.Fatalf("want stack depth = %d, got %d", depth, len(stk))
+ }
+
+ if rootFn, wantFn := stk[depth-1], "runtime/pprof.produceProfileEvents"; rootFn != wantFn {
+ t.Fatalf("want stack stack root %s, got %v", wantFn, rootFn)
+ }
+ })
+ }
+}
+
+func hasPrefix(stk []string, prefix []string) bool {
+ if len(prefix) > len(stk) {
+ return false
+ }
+ for i := range prefix {
+ if stk[i] != prefix[i] {
+ return false
+ }
+ }
+ return true
+}
+
+// ensure that stack records are valid map keys (comparable)
+var _ = map[runtime.MemProfileRecord]struct{}{}
+var _ = map[runtime.StackRecord]struct{}{}
+
+// allocDeep calls itself n times before calling fn.
+func allocDeep(n int) {
+ if n > 1 {
+ allocDeep(n - 1)
+ return
+ }
+ memSink = make([]byte, 1<<20)
+}
+
+// blockChanDeep produces a block profile event at stack depth n, including the
+// caller.
+func blockChanDeep(t *testing.T, n int) {
+ if n > 1 {
+ blockChanDeep(t, n-1)
+ return
+ }
+ ch := make(chan struct{})
+ go func() {
+ awaitBlockedGoroutine(t, "chan receive", "blockChanDeep", 1)
+ ch <- struct{}{}
+ }()
+ <-ch
+}
+
+// blockMutexDeep produces a block profile event at stack depth n, including the
+// caller.
+func blockMutexDeep(t *testing.T, n int) {
+ if n > 1 {
+ blockMutexDeep(t, n-1)
+ return
+ }
+ var mu sync.Mutex
+ go func() {
+ mu.Lock()
+ mu.Lock()
+ }()
+ awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutexDeep", 1)
+ mu.Unlock()
+}
+
+// goroutineDeep blocks at stack depth n, including the caller until the test is
+// finished.
+func goroutineDeep(t *testing.T, n int) {
+ if n > 1 {
+ goroutineDeep(t, n-1)
+ return
+ }
+ wait := make(chan struct{}, 1)
+ t.Cleanup(func() {
+ wait <- struct{}{}
+ })
+ <-wait
+}
+
+// produceProfileEvents produces pprof events at the given stack depth and then
+// blocks in goroutineDeep until the test completes. The stack traces are
+// guaranteed to have exactly the desired depth with produceProfileEvents as
+// their root frame which is expected by TestProfilerStackDepth.
+func produceProfileEvents(t *testing.T, depth int) {
+ allocDeep(depth - 1) // -1 for produceProfileEvents, **
+ blockChanDeep(t, depth-2) // -2 for produceProfileEvents, **, chanrecv1
+ blockMutexDeep(t, depth-2) // -2 for produceProfileEvents, **, Unlock
+ memSink = nil
+ runtime.GC()
+ goroutineDeep(t, depth-4) // -4 for produceProfileEvents, **, chanrecv1, chanrev, gopark
+}