From: Heschi Kreinick Date: Fri, 17 Aug 2018 20:32:08 +0000 (-0400) Subject: runtime: handle morestack system stack transition in gentraceback X-Git-Tag: go1.12beta1~1294 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=ae6361e4bd78b85c284881a16b9b3f81133db1bb;p=gostls13.git runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick TryBot-Result: Gobot Gobot Reviewed-by: Austin Clements --- diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index 44d514393e..095972fa68 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -73,14 +73,14 @@ func cpuHog2(x int) int { } func TestCPUProfile(t *testing.T) { - testCPUProfile(t, []string{"runtime/pprof.cpuHog1"}, func(dur time.Duration) { + testCPUProfile(t, stackContains, []string{"runtime/pprof.cpuHog1"}, func(dur time.Duration) { cpuHogger(cpuHog1, &salt1, dur) }) } func TestCPUProfileMultithreaded(t *testing.T) { defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2)) - testCPUProfile(t, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog2"}, func(dur time.Duration) { + testCPUProfile(t, stackContains, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog2"}, func(dur time.Duration) { c := make(chan int) go func() { cpuHogger(cpuHog1, &salt1, dur) @@ -92,7 +92,7 @@ func TestCPUProfileMultithreaded(t *testing.T) { } func TestCPUProfileInlining(t *testing.T) { - testCPUProfile(t, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, func(dur time.Duration) { + testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, func(dur time.Duration) { cpuHogger(inlinedCaller, &salt1, dur) }) } @@ -130,7 +130,9 @@ func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Loca } } -func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) { +// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need, +// as interpreted by matches. +func testCPUProfile(t *testing.T, matches matchFunc, need []string, f func(dur time.Duration)) { switch runtime.GOOS { case "darwin": switch runtime.GOARCH { @@ -169,7 +171,7 @@ func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) { f(duration) StopCPUProfile() - if profileOk(t, need, prof, duration) { + if profileOk(t, need, matches, prof, duration) { return } @@ -202,7 +204,21 @@ func contains(slice []string, s string) bool { return false } -func profileOk(t *testing.T, need []string, prof bytes.Buffer, duration time.Duration) (ok bool) { +// stackContains matches if a function named spec appears anywhere in the stack trace. +func stackContains(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool { + for _, loc := range stk { + for _, line := range loc.Line { + if strings.Contains(line.Function.Name, spec) { + return true + } + } + } + return false +} + +type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool + +func profileOk(t *testing.T, need []string, matches matchFunc, prof bytes.Buffer, duration time.Duration) (ok bool) { ok = true // Check that profile is well formed and contains need. @@ -213,20 +229,9 @@ func profileOk(t *testing.T, need []string, prof bytes.Buffer, duration time.Dur fmt.Fprintf(&buf, "%d:", count) fprintStack(&buf, stk) samples += count - for i, name := range need { - if semi := strings.Index(name, ";"); semi > -1 { - kv := strings.SplitN(name[semi+1:], "=", 2) - if len(kv) != 2 || !contains(labels[kv[0]], kv[1]) { - continue - } - name = name[:semi] - } - for _, loc := range stk { - for _, line := range loc.Line { - if strings.Contains(line.Function.Name, name) { - have[i] += count - } - } + for i, spec := range need { + if matches(spec, count, stk, labels) { + have[i] += count } } fmt.Fprintf(&buf, "\n") @@ -377,7 +382,7 @@ func fprintStack(w io.Writer, stk []*profile.Location) { // Test that profiling of division operations is okay, especially on ARM. See issue 6681. func TestMathBigDivide(t *testing.T) { - testCPUProfile(t, nil, func(duration time.Duration) { + testCPUProfile(t, nil, nil, func(duration time.Duration) { t := time.After(duration) pi := new(big.Int) for { @@ -395,6 +400,48 @@ func TestMathBigDivide(t *testing.T) { }) } +// stackContainsAll matches if all functions in spec (comma-separated) appear somewhere in the stack trace. +func stackContainsAll(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool { + for _, f := range strings.Split(spec, ",") { + if !stackContains(f, count, stk, labels) { + return false + } + } + return true +} + +func TestMorestack(t *testing.T) { + testCPUProfile(t, stackContainsAll, []string{"runtime.newstack,runtime/pprof.growstack"}, func(duration time.Duration) { + t := time.After(duration) + c := make(chan bool) + for { + go func() { + growstack1() + c <- true + }() + select { + case <-t: + return + case <-c: + } + } + }) +} + +//go:noinline +func growstack1() { + growstack() +} + +//go:noinline +func growstack() { + var buf [8 << 10]byte + use(buf) +} + +//go:noinline +func use(x [8 << 10]byte) {} + func TestBlockProfile(t *testing.T) { type TestCase struct { name string @@ -848,8 +895,25 @@ func TestEmptyCallStack(t *testing.T) { } } +// stackContainsLabeled takes a spec like funcname;key=value and matches if the stack has that key +// and value and has funcname somewhere in the stack. +func stackContainsLabeled(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool { + semi := strings.Index(spec, ";") + if semi == -1 { + panic("no semicolon in key/value spec") + } + kv := strings.SplitN(spec[semi+1:], "=", 2) + if len(kv) != 2 { + panic("missing = in key/value spec") + } + if !contains(labels[kv[0]], kv[1]) { + return false + } + return stackContains(spec[:semi], count, stk, labels) +} + func TestCPUProfileLabel(t *testing.T) { - testCPUProfile(t, []string{"runtime/pprof.cpuHogger;key=value"}, func(dur time.Duration) { + testCPUProfile(t, stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, func(dur time.Duration) { Do(context.Background(), Labels("key", "value"), func(context.Context) { cpuHogger(cpuHog1, &salt1, dur) }) @@ -860,7 +924,7 @@ func TestLabelRace(t *testing.T) { // Test the race detector annotations for synchronization // between settings labels and consuming them from the // profile. - testCPUProfile(t, []string{"runtime/pprof.cpuHogger;key=value"}, func(dur time.Duration) { + testCPUProfile(t, stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, func(dur time.Duration) { start := time.Now() var wg sync.WaitGroup for time.Since(start) < dur { diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 78589f5ea3..8370fd7593 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -197,16 +197,29 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in // Found an actual function. // Derive frame pointer and link register. if frame.fp == 0 { - // We want to jump over the systemstack switch. If we're running on the - // g0, this systemstack is at the top of the stack. - // if we're not on g0 or there's a no curg, then this is a regular call. - sp := frame.sp - if flags&_TraceJumpStack != 0 && f.funcID == funcID_systemstack && gp == gp.m.g0 && gp.m.curg != nil { - sp = gp.m.curg.sched.sp - frame.sp = sp - cgoCtxt = gp.m.curg.cgoCtxt + // Jump over system stack transitions. If we're on g0 and there's a user + // goroutine, try to jump. Otherwise this is a regular call. + if flags&_TraceJumpStack != 0 && gp == gp.m.g0 && gp.m.curg != nil { + switch f.funcID { + case funcID_morestack: + // morestack does not return normally -- newstack() + // gogo's to curg.sched. Match that. + // This keeps morestack() from showing up in the backtrace, + // but that makes some sense since it'll never be returned + // to. + frame.pc = gp.m.curg.sched.pc + frame.fn = findfunc(frame.pc) + f = frame.fn + frame.sp = gp.m.curg.sched.sp + cgoCtxt = gp.m.curg.cgoCtxt + case funcID_systemstack: + // systemstack returns normally, so just follow the + // stack transition. + frame.sp = gp.m.curg.sched.sp + cgoCtxt = gp.m.curg.cgoCtxt + } } - frame.fp = sp + uintptr(funcspdelta(f, frame.pc, &cache)) + frame.fp = frame.sp + uintptr(funcspdelta(f, frame.pc, &cache)) if !usesLR { // On x86, call instruction pushes return PC before entering new function. frame.fp += sys.RegSize