From 9eba17ff90963cdbbe47af887fb3152c0c4d1ebb Mon Sep 17 00:00:00 2001
From: Austin Clements
+ When printing very deep stacks, the runtime now prints the first 50 + (innermost) frames followed by the bottom 50 (outermost) frames, + rather than just printing the first 100 frames. This makes it easier + to see how deeply recursive stacks started, and is especially + valuable for debugging stack overflows. +
+diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 31d32eabeb..367362b672 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -47,6 +47,9 @@ var NetpollGenericInit = netpollGenericInit var Memmove = memmove var MemclrNoHeapPointers = memclrNoHeapPointers +const TracebackInnerFrames = tracebackInnerFrames +const TracebackOuterFrames = tracebackOuterFrames + var LockPartialOrder = lockPartialOrder type LockRank lockRank diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 533cf20838..76ed8966dc 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -4356,7 +4356,7 @@ func saveAncestors(callergp *g) *[]ancestorInfo { ancestors := make([]ancestorInfo, n) copy(ancestors[1:], callerAncestors) - var pcs [_TracebackMaxFrames]uintptr + var pcs [tracebackInnerFrames]uintptr npcs := gcallers(callergp, 0, pcs[:]) ipcs := make([]uintptr, npcs) copy(ipcs, pcs[:]) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 2e98f895a3..a9706a642e 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -1049,9 +1049,6 @@ type ancestorInfo struct { gopc uintptr // pc of go statement that created this goroutine } -// The maximum number of frames we print for a traceback -const _TracebackMaxFrames = 100 - // A waitReason explains why a goroutine has been stopped. // See gopark. Do not re-use waitReasons, add new ones. type waitReason uint8 diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 9dfa97f658..eb5e67eb81 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -21,6 +21,17 @@ import ( const usesLR = sys.MinFrameSize > 0 +const ( + // tracebackInnerFrames is the number of innermost frames to print in a + // stack trace. The total maximum frames is tracebackInnerFrames + + // tracebackOuterFrames. + tracebackInnerFrames = 50 + + // tracebackOuterFrames is the number of outermost frames to print in a + // stack trace. + tracebackOuterFrames = 50 +) + // unwindFlags control the behavior of various unwinders. type unwindFlags uint8 @@ -812,18 +823,80 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags unwindFlags) { flags &^= unwindTrap } - // Print traceback. By default, omits runtime frames. - // If that means we print nothing at all, repeat forcing all frames printed. + // Print traceback. + // + // We print the first tracebackInnerFrames frames, and the last + // tracebackOuterFrames frames. There are many possible approaches to this. + // There are various complications to this: + // + // - We'd prefer to walk the stack once because in really bad situations + // traceback may crash (and we want as much output as possible) or the stack + // may be changing. + // + // - Each physical frame can represent several logical frames, so we might + // have to pause in the middle of a physical frame and pick up in the middle + // of a physical frame. + // + // - The cgo symbolizer can expand a cgo PC to more than one logical frame, + // and involves juggling state on the C side that we don't manage. Since its + // expansion state is managed on the C side, we can't capture the expansion + // state part way through, and because the output strings are managed on the + // C side, we can't capture the output. Thus, our only choice is to replay a + // whole expansion, potentially discarding some of it. + // + // Rejected approaches: + // + // - Do two passes where the first pass just counts and the second pass does + // all the printing. This is undesireable if the stack is corrupted or changing + // because we won't see a partial stack if we panic. + // + // - Keep a ring buffer of the last N logical frames and use this to print + // the bottom frames once we reach the end of the stack. This works, but + // requires keeping a surprising amount of state on the stack, and we have + // to run the cgo symbolizer twiceâonce to count frames, and a second to + // print themâsince we can't retain the strings it returns. + // + // Instead, we print the outer frames, and if we reach that limit, we clone + // the unwinder, count the remaining frames, and then skip forward and + // finish printing from the clone. This makes two passes over the outer part + // of the stack, but the single pass over the inner part ensures that's + // printed immediately and not revisited. It keeps minimal state on the + // stack. And through a combination of skip counts and limits, we can do all + // of the steps we need with a single traceback printer implementation. + // + // We could be more lax about exactly how many frames we print, for example + // always stopping and resuming on physical frame boundaries, or at least + // cgo expansion boundaries. It's not clear that's much simpler. flags |= unwindPrintErrors var u unwinder - u.initAt(pc, sp, lr, gp, flags) - n := traceback2(&u, false) - if n == 0 { + tracebackWithRuntime := func(showRuntime bool) int { + const maxInt int = 0x7fffffff u.initAt(pc, sp, lr, gp, flags) - n = traceback2(&u, true) + n, lastN := traceback2(&u, showRuntime, 0, tracebackInnerFrames) + if n < tracebackInnerFrames { + // We printed the whole stack. + return n + } + // Clone the unwinder and figure out how many frames are left. This + // count will include any logical frames already printed for u's current + // physical frame. + u2 := u + remaining, _ := traceback2(&u, showRuntime, maxInt, 0) + elide := remaining - lastN - tracebackOuterFrames + if elide > 0 { + print("...", elide, " frames elided...\n") + traceback2(&u2, showRuntime, lastN+elide, tracebackOuterFrames) + } else if elide <= 0 { + // There are tracebackOuterFrames or fewer frames left to print. + // Just print the rest of the stack. + traceback2(&u2, showRuntime, lastN, tracebackOuterFrames) + } + return n } - if n == _TracebackMaxFrames { - print("...additional frames elided...\n") + // By default, omits runtime frames. If that means we print nothing at all, + // repeat forcing all frames printed. + if tracebackWithRuntime(false) == 0 { + tracebackWithRuntime(true) } printcreatedby(gp) @@ -835,15 +908,38 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags unwindFlags) { } } -func traceback2(u *unwinder, showRuntime bool) int { +// traceback2 prints a stack trace starting at u. It skips the first "skip" +// logical frames, after which it prints at most "max" logical frames. It +// returns n, which is the number of logical frames skipped and printed, and +// lastN, which is the number of logical frames skipped or printed just in the +// phyiscal frame that u references. +func traceback2(u *unwinder, showRuntime bool, skip, max int) (n, lastN int) { + // commitFrame commits to a logical frame and returns whether this frame + // should be printed and whether iteration should stop. + commitFrame := func() (pr, stop bool) { + if skip == 0 && max == 0 { + // Stop + return false, true + } + n++ + lastN++ + if skip > 0 { + // Skip + skip-- + return false, false + } + // Print + max-- + return true, false + } + gp := u.g.ptr() level, _, _ := gotraceback() - n := 0 - const max = _TracebackMaxFrames var cgoBuf [32]uintptr - for ; n < max && u.valid(); u.next() { + for ; u.valid(); u.next() { + lastN = 0 f := u.frame.fn - for iu, uf := newInlineUnwinder(f, u.symPC(), noEscapePtr(&u.cache)); n < max && uf.valid(); uf = iu.next(uf) { + for iu, uf := newInlineUnwinder(f, u.symPC(), noEscapePtr(&u.cache)); uf.valid(); uf = iu.next(uf) { sf := iu.srcFunc(uf) callee := u.calleeFuncID u.calleeFuncID = sf.funcID @@ -851,6 +947,12 @@ func traceback2(u *unwinder, showRuntime bool) int { continue } + if pr, stop := commitFrame(); stop { + return + } else if !pr { + continue + } + name := sf.name() file, line := iu.fileLine(uf) if name == "runtime.gopanic" { @@ -878,34 +980,39 @@ func traceback2(u *unwinder, showRuntime bool) int { } } print("\n") - n++ } // Print cgo frames. if cgoN := u.cgoCallers(cgoBuf[:]); cgoN > 0 { var arg cgoSymbolizerArg anySymbolized := false + stop := false for _, pc := range cgoBuf[:cgoN] { - if n >= max { - break - } if cgoSymbolizer == nil { - print("non-Go function at pc=", hex(pc), "\n") + if pr, stop := commitFrame(); stop { + break + } else if pr { + print("non-Go function at pc=", hex(pc), "\n") + } } else { - c := printOneCgoTraceback(pc, max-n, &arg) - n += c - 1 // +1 a few lines down + stop = printOneCgoTraceback(pc, commitFrame, &arg) anySymbolized = true + if stop { + break + } } - n++ } if anySymbolized { // Free symbolization state. arg.pc = 0 callCgoSymbolizer(&arg) } + if stop { + return + } } } - return n + return n, 0 } // printAncestorTraceback prints the traceback of the given ancestor. @@ -918,7 +1025,7 @@ func printAncestorTraceback(ancestor ancestorInfo) { printAncestorTracebackFuncInfo(f, pc) } } - if len(ancestor.pcs) == _TracebackMaxFrames { + if len(ancestor.pcs) == tracebackInnerFrames { print("...additional frames elided...\n") } // Show what created goroutine, except main goroutine (goid 1). @@ -1405,12 +1512,13 @@ func printCgoTraceback(callers *cgoCallers) { return } + commitFrame := func() (pr, stop bool) { return true, false } var arg cgoSymbolizerArg for _, c := range callers { if c == 0 { break } - printOneCgoTraceback(c, 0x7fffffff, &arg) + printOneCgoTraceback(c, commitFrame, &arg) } arg.pc = 0 callCgoSymbolizer(&arg) @@ -1418,11 +1526,16 @@ func printCgoTraceback(callers *cgoCallers) { // printOneCgoTraceback prints the traceback of a single cgo caller. // This can print more than one line because of inlining. -// Returns the number of frames printed. -func printOneCgoTraceback(pc uintptr, max int, arg *cgoSymbolizerArg) int { - c := 0 +// It returns the "stop" result of commitFrame. +func printOneCgoTraceback(pc uintptr, commitFrame func() (pr, stop bool), arg *cgoSymbolizerArg) bool { arg.pc = pc - for c <= max { + for { + if pr, stop := commitFrame(); stop { + return true + } else if !pr { + continue + } + callCgoSymbolizer(arg) if arg.funcName != nil { // Note that we don't print any argument @@ -1437,12 +1550,10 @@ func printOneCgoTraceback(pc uintptr, max int, arg *cgoSymbolizerArg) int { print(gostringnocopy(arg.file), ":", arg.lineno, " ") } print("pc=", hex(pc), "\n") - c++ if arg.more == 0 { - break + return false } } - return c } // callCgoSymbolizer calls the cgoSymbolizer function. diff --git a/src/runtime/traceback_test.go b/src/runtime/traceback_test.go index a47ddb61c5..4dd1d4bae9 100644 --- a/src/runtime/traceback_test.go +++ b/src/runtime/traceback_test.go @@ -9,8 +9,10 @@ import ( "fmt" "internal/abi" "internal/testenv" + "regexp" "runtime" "runtime/debug" + "strconv" "strings" "sync" "testing" @@ -150,6 +152,113 @@ func ttiExcluded3() *ttiResult { var testTracebackArgsBuf [1000]byte +func TestTracebackElision(t *testing.T) { + // Test printing exactly the maximum number of frames to make sure we don't + // print any "elided" message, eliding exactly 1 so we have to pick back up + // in the paused physical frame, and eliding 10 so we have to advance the + // physical frame forward. + for _, elided := range []int{0, 1, 10} { + t.Run(fmt.Sprintf("elided=%d", elided), func(t *testing.T) { + n := elided + runtime.TracebackInnerFrames + runtime.TracebackOuterFrames + + // Start a new goroutine so we have control over the whole stack. + stackChan := make(chan string) + go tteStack(n, stackChan) + stack := <-stackChan + tb := parseTraceback1(t, stack) + + // Check the traceback. + i := 0 + for i < n { + if len(tb.frames) == 0 { + t.Errorf("traceback ended early") + break + } + fr := tb.frames[0] + if i == runtime.TracebackInnerFrames && elided > 0 { + // This should be an "elided" frame. + if fr.elided != elided { + t.Errorf("want %d frames elided", elided) + break + } + i += fr.elided + } else { + want := fmt.Sprintf("runtime_test.tte%d", (i+1)%5) + if i == 0 { + want = "runtime/debug.Stack" + } else if i == n-1 { + want = "runtime_test.tteStack" + } + if fr.funcName != want { + t.Errorf("want %s, got %s", want, fr.funcName) + break + } + i++ + } + tb.frames = tb.frames[1:] + } + if !t.Failed() && len(tb.frames) > 0 { + t.Errorf("got %d more frames than expected", len(tb.frames)) + } + if t.Failed() { + t.Logf("traceback diverged at frame %d", i) + off := len(stack) + if len(tb.frames) > 0 { + off = tb.frames[0].off + } + t.Logf("traceback before error:\n%s", stack[:off]) + t.Logf("traceback after error:\n%s", stack[off:]) + } + }) + } +} + +// tteStack creates a stack of n logical frames and sends the traceback to +// stack. It cycles through 5 logical frames per physical frame to make it +// unlikely that any part of the traceback will end on a physical boundary. +func tteStack(n int, stack chan<- string) { + n-- // Account for this frame + // This is basically a Duff's device for starting the inline stack in the + // right place so we wind up at tteN when n%5=N. + switch n % 5 { + case 0: + stack <- tte0(n) + case 1: + stack <- tte1(n) + case 2: + stack <- tte2(n) + case 3: + stack <- tte3(n) + case 4: + stack <- tte4(n) + default: + panic("unreachable") + } +} +func tte0(n int) string { + return tte4(n - 1) +} +func tte1(n int) string { + return tte0(n - 1) +} +func tte2(n int) string { + // tte2 opens n%5 == 2 frames. It's also the base case of the recursion, + // since we can open no fewer than two frames to call debug.Stack(). + if n < 2 { + panic("bad n") + } + if n == 2 { + return string(debug.Stack()) + } + return tte1(n - 1) +} +func tte3(n int) string { + return tte2(n - 1) +} +func tte4(n int) string { + return tte3(n - 1) +} + func TestTracebackArgs(t *testing.T) { if *flagQuick { t.Skip("-quick") @@ -586,37 +695,50 @@ type tbFrame struct { funcName string args string inlined bool + + // elided is set to the number of frames elided, and the other fields are + // set to the zero value. + elided int + + off int // byte offset in the traceback text of this frame } // parseTraceback parses a printed traceback to make it easier for tests to // check the result. func parseTraceback(t *testing.T, tb string) []*traceback { - lines := strings.Split(tb, "\n") - nLines := len(lines) + //lines := strings.Split(tb, "\n") + //nLines := len(lines) + off := 0 + lineNo := 0 fatal := func(f string, args ...any) { - lineNo := nLines - len(lines) + 1 msg := fmt.Sprintf(f, args...) t.Fatalf("%s (line %d):\n%s", msg, lineNo, tb) } parseFrame := func(funcName, args string) *tbFrame { // Consume file/line/etc - if len(lines) == 0 || !strings.HasPrefix(lines[0], "\t") { + if !strings.HasPrefix(tb, "\t") { fatal("missing source line") } - lines = lines[1:] + _, tb, _ = strings.Cut(tb, "\n") + lineNo++ inlined := args == "..." - return &tbFrame{funcName, args, inlined} + return &tbFrame{funcName: funcName, args: args, inlined: inlined, off: off} } + var elidedRe = regexp.MustCompile(`^\.\.\.([0-9]+) frames elided\.\.\.$`) var tbs []*traceback var cur *traceback - for len(lines) > 0 { - line := lines[0] - lines = lines[1:] + tbLen := len(tb) + for len(tb) > 0 { + var line string + off = tbLen - len(tb) + line, tb, _ = strings.Cut(tb, "\n") + lineNo++ switch { case strings.HasPrefix(line, "goroutine "): cur = &traceback{} tbs = append(tbs, cur) case line == "": + // Separator between goroutines cur = nil case line[0] == '\t': fatal("unexpected indent") @@ -631,6 +753,12 @@ func parseTraceback(t *testing.T, tb string) []*traceback { } frame := parseFrame(funcName, args) cur.frames = append(cur.frames, frame) + case elidedRe.MatchString(line): + // "...N frames elided..." + nStr := elidedRe.FindStringSubmatch(line) + n, _ := strconv.Atoi(nStr[1]) + frame := &tbFrame{elided: n} + cur.frames = append(cur.frames, frame) } } return tbs -- 2.48.1