From 9eba17ff90963cdbbe47af887fb3152c0c4d1ebb Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Mon, 13 Mar 2023 14:02:16 -0400
Subject: [PATCH] runtime: for deep stacks, print both the top 50 and bottom 50
 frames

This is relatively easy using the new traceback iterator.

Ancestor tracebacks are now limited to 50 frames. We could keep that
at 100, but the fact that it used 100 before seemed arbitrary and
unnecessary.

Fixes #7181
Updates #54466

Change-Id: If693045881d84848f17e568df275a5105b6f1cb0
Reviewed-on: https://go-review.googlesource.com/c/go/+/475960
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 doc/go1.21.html               |   8 ++
 src/runtime/export_test.go    |   3 +
 src/runtime/proc.go           |   2 +-
 src/runtime/runtime2.go       |   3 -
 src/runtime/traceback.go      | 173 ++++++++++++++++++++++++++++------
 src/runtime/traceback_test.go | 146 ++++++++++++++++++++++++++--
 6 files changed, 291 insertions(+), 44 deletions(-)
diff --git a/doc/go1.21.html b/doc/go1.21.html
index 0af9d373af..38678a93c2 100644
--- a/doc/go1.21.html
+++ b/doc/go1.21.html
@@ -49,6 +49,14 @@ Do not send CLs removing the interior tags from such phrases.
   TODO: complete this section, or delete if not needed
 </p>
 
+<p><!-- https://go.dev/issue/7181 -->
+  When printing very deep stacks, the runtime now prints the first 50
+  (innermost) frames followed by the bottom 50 (outermost) frames,
+  rather than just printing the first 100 frames. This makes it easier
+  to see how deeply recursive stacks started, and is especially
+  valuable for debugging stack overflows.
+</p>
+
 <h2 id="compiler">Compiler</h2>
 
 <p>
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 31d32eabeb..367362b672 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -47,6 +47,9 @@ var NetpollGenericInit = netpollGenericInit
 var Memmove = memmove
 var MemclrNoHeapPointers = memclrNoHeapPointers
 
+const TracebackInnerFrames = tracebackInnerFrames
+const TracebackOuterFrames = tracebackOuterFrames
+
 var LockPartialOrder = lockPartialOrder
 
 type LockRank lockRank
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 533cf20838..76ed8966dc 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -4356,7 +4356,7 @@ func saveAncestors(callergp *g) *[]ancestorInfo {
 	ancestors := make([]ancestorInfo, n)
 	copy(ancestors[1:], callerAncestors)
 
-	var pcs [_TracebackMaxFrames]uintptr
+	var pcs [tracebackInnerFrames]uintptr
 	npcs := gcallers(callergp, 0, pcs[:])
 	ipcs := make([]uintptr, npcs)
 	copy(ipcs, pcs[:])
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 2e98f895a3..a9706a642e 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -1049,9 +1049,6 @@ type ancestorInfo struct {
 	gopc uintptr   // pc of go statement that created this goroutine
 }
 
-// The maximum number of frames we print for a traceback
-const _TracebackMaxFrames = 100
-
 // A waitReason explains why a goroutine has been stopped.
 // See gopark. Do not re-use waitReasons, add new ones.
 type waitReason uint8
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 9dfa97f658..eb5e67eb81 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -21,6 +21,17 @@ import (
 
 const usesLR = sys.MinFrameSize > 0
 
+const (
+	// tracebackInnerFrames is the number of innermost frames to print in a
+	// stack trace. The total maximum frames is tracebackInnerFrames +
+	// tracebackOuterFrames.
+	tracebackInnerFrames = 50
+
+	// tracebackOuterFrames is the number of outermost frames to print in a
+	// stack trace.
+	tracebackOuterFrames = 50
+)
+
 // unwindFlags control the behavior of various unwinders.
 type unwindFlags uint8
 
@@ -812,18 +823,80 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags unwindFlags) {
 		flags &^= unwindTrap
 	}
 
-	// Print traceback. By default, omits runtime frames.
-	// If that means we print nothing at all, repeat forcing all frames printed.
+	// Print traceback.
+	//
+	// We print the first tracebackInnerFrames frames, and the last
+	// tracebackOuterFrames frames. There are many possible approaches to this.
+	// There are various complications to this:
+	//
+	// - We'd prefer to walk the stack once because in really bad situations
+	//   traceback may crash (and we want as much output as possible) or the stack
+	//   may be changing.
+	//
+	// - Each physical frame can represent several logical frames, so we might
+	//   have to pause in the middle of a physical frame and pick up in the middle
+	//   of a physical frame.
+	//
+	// - The cgo symbolizer can expand a cgo PC to more than one logical frame,
+	//   and involves juggling state on the C side that we don't manage. Since its
+	//   expansion state is managed on the C side, we can't capture the expansion
+	//   state part way through, and because the output strings are managed on the
+	//   C side, we can't capture the output. Thus, our only choice is to replay a
+	//   whole expansion, potentially discarding some of it.
+	//
+	// Rejected approaches:
+	//
+	// - Do two passes where the first pass just counts and the second pass does
+	//   all the printing. This is undesireable if the stack is corrupted or changing
+	//   because we won't see a partial stack if we panic.
+	//
+	// - Keep a ring buffer of the last N logical frames and use this to print
+	//   the bottom frames once we reach the end of the stack. This works, but
+	//   requires keeping a surprising amount of state on the stack, and we have
+	//   to run the cgo symbolizer twiceâonce to count frames, and a second to
+	//   print themâsince we can't retain the strings it returns.
+	//
+	// Instead, we print the outer frames, and if we reach that limit, we clone
+	// the unwinder, count the remaining frames, and then skip forward and
+	// finish printing from the clone. This makes two passes over the outer part
+	// of the stack, but the single pass over the inner part ensures that's
+	// printed immediately and not revisited. It keeps minimal state on the
+	// stack. And through a combination of skip counts and limits, we can do all
+	// of the steps we need with a single traceback printer implementation.
+	//
+	// We could be more lax about exactly how many frames we print, for example
+	// always stopping and resuming on physical frame boundaries, or at least
+	// cgo expansion boundaries. It's not clear that's much simpler.
 	flags |= unwindPrintErrors
 	var u unwinder
-	u.initAt(pc, sp, lr, gp, flags)
-	n := traceback2(&u, false)
-	if n == 0 {
+	tracebackWithRuntime := func(showRuntime bool) int {
+		const maxInt int = 0x7fffffff
 		u.initAt(pc, sp, lr, gp, flags)
-		n = traceback2(&u, true)
+		n, lastN := traceback2(&u, showRuntime, 0, tracebackInnerFrames)
+		if n < tracebackInnerFrames {
+			// We printed the whole stack.
+			return n
+		}
+		// Clone the unwinder and figure out how many frames are left. This
+		// count will include any logical frames already printed for u's current
+		// physical frame.
+		u2 := u
+		remaining, _ := traceback2(&u, showRuntime, maxInt, 0)
+		elide := remaining - lastN - tracebackOuterFrames
+		if elide > 0 {
+			print("...", elide, " frames elided...\n")
+			traceback2(&u2, showRuntime, lastN+elide, tracebackOuterFrames)
+		} else if elide <= 0 {
+			// There are tracebackOuterFrames or fewer frames left to print.
+			// Just print the rest of the stack.
+			traceback2(&u2, showRuntime, lastN, tracebackOuterFrames)
+		}
+		return n
 	}
-	if n == _TracebackMaxFrames {
-		print("...additional frames elided...\n")
+	// By default, omits runtime frames. If that means we print nothing at all,
+	// repeat forcing all frames printed.
+	if tracebackWithRuntime(false) == 0 {
+		tracebackWithRuntime(true)
 	}
 	printcreatedby(gp)
 
@@ -835,15 +908,38 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags unwindFlags) {
 	}
 }
 
-func traceback2(u *unwinder, showRuntime bool) int {
+// traceback2 prints a stack trace starting at u. It skips the first "skip"
+// logical frames, after which it prints at most "max" logical frames. It
+// returns n, which is the number of logical frames skipped and printed, and
+// lastN, which is the number of logical frames skipped or printed just in the
+// phyiscal frame that u references.
+func traceback2(u *unwinder, showRuntime bool, skip, max int) (n, lastN int) {
+	// commitFrame commits to a logical frame and returns whether this frame
+	// should be printed and whether iteration should stop.
+	commitFrame := func() (pr, stop bool) {
+		if skip == 0 && max == 0 {
+			// Stop
+			return false, true
+		}
+		n++
+		lastN++
+		if skip > 0 {
+			// Skip
+			skip--
+			return false, false
+		}
+		// Print
+		max--
+		return true, false
+	}
+
 	gp := u.g.ptr()
 	level, _, _ := gotraceback()
-	n := 0
-	const max = _TracebackMaxFrames
 	var cgoBuf [32]uintptr
-	for ; n < max && u.valid(); u.next() {
+	for ; u.valid(); u.next() {
+		lastN = 0
 		f := u.frame.fn
-		for iu, uf := newInlineUnwinder(f, u.symPC(), noEscapePtr(&u.cache)); n < max && uf.valid(); uf = iu.next(uf) {
+		for iu, uf := newInlineUnwinder(f, u.symPC(), noEscapePtr(&u.cache)); uf.valid(); uf = iu.next(uf) {
 			sf := iu.srcFunc(uf)
 			callee := u.calleeFuncID
 			u.calleeFuncID = sf.funcID
@@ -851,6 +947,12 @@ func traceback2(u *unwinder, showRuntime bool) int {
 				continue
 			}
 
+			if pr, stop := commitFrame(); stop {
+				return
+			} else if !pr {
+				continue
+			}
+
 			name := sf.name()
 			file, line := iu.fileLine(uf)
 			if name == "runtime.gopanic" {
@@ -878,34 +980,39 @@ func traceback2(u *unwinder, showRuntime bool) int {
 				}
 			}
 			print("\n")
-			n++
 		}
 
 		// Print cgo frames.
 		if cgoN := u.cgoCallers(cgoBuf[:]); cgoN > 0 {
 			var arg cgoSymbolizerArg
 			anySymbolized := false
+			stop := false
 			for _, pc := range cgoBuf[:cgoN] {
-				if n >= max {
-					break
-				}
 				if cgoSymbolizer == nil {
-					print("non-Go function at pc=", hex(pc), "\n")
+					if pr, stop := commitFrame(); stop {
+						break
+					} else if pr {
+						print("non-Go function at pc=", hex(pc), "\n")
+					}
 				} else {
-					c := printOneCgoTraceback(pc, max-n, &arg)
-					n += c - 1 // +1 a few lines down
+					stop = printOneCgoTraceback(pc, commitFrame, &arg)
 					anySymbolized = true
+					if stop {
+						break
+					}
 				}
-				n++
 			}
 			if anySymbolized {
 				// Free symbolization state.
 				arg.pc = 0
 				callCgoSymbolizer(&arg)
 			}
+			if stop {
+				return
+			}
 		}
 	}
-	return n
+	return n, 0
 }
 
 // printAncestorTraceback prints the traceback of the given ancestor.
@@ -918,7 +1025,7 @@ func printAncestorTraceback(ancestor ancestorInfo) {
 			printAncestorTracebackFuncInfo(f, pc)
 		}
 	}
-	if len(ancestor.pcs) == _TracebackMaxFrames {
+	if len(ancestor.pcs) == tracebackInnerFrames {
 		print("...additional frames elided...\n")
 	}
 	// Show what created goroutine, except main goroutine (goid 1).
@@ -1405,12 +1512,13 @@ func printCgoTraceback(callers *cgoCallers) {
 		return
 	}
 
+	commitFrame := func() (pr, stop bool) { return true, false }
 	var arg cgoSymbolizerArg
 	for _, c := range callers {
 		if c == 0 {
 			break
 		}
-		printOneCgoTraceback(c, 0x7fffffff, &arg)
+		printOneCgoTraceback(c, commitFrame, &arg)
 	}
 	arg.pc = 0
 	callCgoSymbolizer(&arg)
@@ -1418,11 +1526,16 @@ func printCgoTraceback(callers *cgoCallers) {
 
 // printOneCgoTraceback prints the traceback of a single cgo caller.
 // This can print more than one line because of inlining.
-// Returns the number of frames printed.
-func printOneCgoTraceback(pc uintptr, max int, arg *cgoSymbolizerArg) int {
-	c := 0
+// It returns the "stop" result of commitFrame.
+func printOneCgoTraceback(pc uintptr, commitFrame func() (pr, stop bool), arg *cgoSymbolizerArg) bool {
 	arg.pc = pc
-	for c <= max {
+	for {
+		if pr, stop := commitFrame(); stop {
+			return true
+		} else if !pr {
+			continue
+		}
+
 		callCgoSymbolizer(arg)
 		if arg.funcName != nil {
 			// Note that we don't print any argument
@@ -1437,12 +1550,10 @@ func printOneCgoTraceback(pc uintptr, max int, arg *cgoSymbolizerArg) int {
 			print(gostringnocopy(arg.file), ":", arg.lineno, " ")
 		}
 		print("pc=", hex(pc), "\n")
-		c++
 		if arg.more == 0 {
-			break
+			return false
 		}
 	}
-	return c
 }
 
 // callCgoSymbolizer calls the cgoSymbolizer function.
diff --git a/src/runtime/traceback_test.go b/src/runtime/traceback_test.go
index a47ddb61c5..4dd1d4bae9 100644
--- a/src/runtime/traceback_test.go
+++ b/src/runtime/traceback_test.go
@@ -9,8 +9,10 @@ import (
 	"fmt"
 	"internal/abi"
 	"internal/testenv"
+	"regexp"
 	"runtime"
 	"runtime/debug"
+	"strconv"
 	"strings"
 	"sync"
 	"testing"
@@ -150,6 +152,113 @@ func ttiExcluded3() *ttiResult {
 
 var testTracebackArgsBuf [1000]byte
 
+func TestTracebackElision(t *testing.T) {
+	// Test printing exactly the maximum number of frames to make sure we don't
+	// print any "elided" message, eliding exactly 1 so we have to pick back up
+	// in the paused physical frame, and eliding 10 so we have to advance the
+	// physical frame forward.
+	for _, elided := range []int{0, 1, 10} {
+		t.Run(fmt.Sprintf("elided=%d", elided), func(t *testing.T) {
+			n := elided + runtime.TracebackInnerFrames + runtime.TracebackOuterFrames
+
+			// Start a new goroutine so we have control over the whole stack.
+			stackChan := make(chan string)
+			go tteStack(n, stackChan)
+			stack := <-stackChan
+			tb := parseTraceback1(t, stack)
+
+			// Check the traceback.
+			i := 0
+			for i < n {
+				if len(tb.frames) == 0 {
+					t.Errorf("traceback ended early")
+					break
+				}
+				fr := tb.frames[0]
+				if i == runtime.TracebackInnerFrames && elided > 0 {
+					// This should be an "elided" frame.
+					if fr.elided != elided {
+						t.Errorf("want %d frames elided", elided)
+						break
+					}
+					i += fr.elided
+				} else {
+					want := fmt.Sprintf("runtime_test.tte%d", (i+1)%5)
+					if i == 0 {
+						want = "runtime/debug.Stack"
+					} else if i == n-1 {
+						want = "runtime_test.tteStack"
+					}
+					if fr.funcName != want {
+						t.Errorf("want %s, got %s", want, fr.funcName)
+						break
+					}
+					i++
+				}
+				tb.frames = tb.frames[1:]
+			}
+			if !t.Failed() && len(tb.frames) > 0 {
+				t.Errorf("got %d more frames than expected", len(tb.frames))
+			}
+			if t.Failed() {
+				t.Logf("traceback diverged at frame %d", i)
+				off := len(stack)
+				if len(tb.frames) > 0 {
+					off = tb.frames[0].off
+				}
+				t.Logf("traceback before error:\n%s", stack[:off])
+				t.Logf("traceback after error:\n%s", stack[off:])
+			}
+		})
+	}
+}
+
+// tteStack creates a stack of n logical frames and sends the traceback to
+// stack. It cycles through 5 logical frames per physical frame to make it
+// unlikely that any part of the traceback will end on a physical boundary.
+func tteStack(n int, stack chan<- string) {
+	n-- // Account for this frame
+	// This is basically a Duff's device for starting the inline stack in the
+	// right place so we wind up at tteN when n%5=N.
+	switch n % 5 {
+	case 0:
+		stack <- tte0(n)
+	case 1:
+		stack <- tte1(n)
+	case 2:
+		stack <- tte2(n)
+	case 3:
+		stack <- tte3(n)
+	case 4:
+		stack <- tte4(n)
+	default:
+		panic("unreachable")
+	}
+}
+func tte0(n int) string {
+	return tte4(n - 1)
+}
+func tte1(n int) string {
+	return tte0(n - 1)
+}
+func tte2(n int) string {
+	// tte2 opens n%5 == 2 frames. It's also the base case of the recursion,
+	// since we can open no fewer than two frames to call debug.Stack().
+	if n < 2 {
+		panic("bad n")
+	}
+	if n == 2 {
+		return string(debug.Stack())
+	}
+	return tte1(n - 1)
+}
+func tte3(n int) string {
+	return tte2(n - 1)
+}
+func tte4(n int) string {
+	return tte3(n - 1)
+}
+
 func TestTracebackArgs(t *testing.T) {
 	if *flagQuick {
 		t.Skip("-quick")
@@ -586,37 +695,50 @@ type tbFrame struct {
 	funcName string
 	args     string
 	inlined  bool
+
+	// elided is set to the number of frames elided, and the other fields are
+	// set to the zero value.
+	elided int
+
+	off int // byte offset in the traceback text of this frame
 }
 
 // parseTraceback parses a printed traceback to make it easier for tests to
 // check the result.
 func parseTraceback(t *testing.T, tb string) []*traceback {
-	lines := strings.Split(tb, "\n")
-	nLines := len(lines)
+	//lines := strings.Split(tb, "\n")
+	//nLines := len(lines)
+	off := 0
+	lineNo := 0
 	fatal := func(f string, args ...any) {
-		lineNo := nLines - len(lines) + 1
 		msg := fmt.Sprintf(f, args...)
 		t.Fatalf("%s (line %d):\n%s", msg, lineNo, tb)
 	}
 	parseFrame := func(funcName, args string) *tbFrame {
 		// Consume file/line/etc
-		if len(lines) == 0 || !strings.HasPrefix(lines[0], "\t") {
+		if !strings.HasPrefix(tb, "\t") {
 			fatal("missing source line")
 		}
-		lines = lines[1:]
+		_, tb, _ = strings.Cut(tb, "\n")
+		lineNo++
 		inlined := args == "..."
-		return &tbFrame{funcName, args, inlined}
+		return &tbFrame{funcName: funcName, args: args, inlined: inlined, off: off}
 	}
+	var elidedRe = regexp.MustCompile(`^\.\.\.([0-9]+) frames elided\.\.\.$`)
 	var tbs []*traceback
 	var cur *traceback
-	for len(lines) > 0 {
-		line := lines[0]
-		lines = lines[1:]
+	tbLen := len(tb)
+	for len(tb) > 0 {
+		var line string
+		off = tbLen - len(tb)
+		line, tb, _ = strings.Cut(tb, "\n")
+		lineNo++
 		switch {
 		case strings.HasPrefix(line, "goroutine "):
 			cur = &traceback{}
 			tbs = append(tbs, cur)
 		case line == "":
+			// Separator between goroutines
 			cur = nil
 		case line[0] == '\t':
 			fatal("unexpected indent")
@@ -631,6 +753,12 @@ func parseTraceback(t *testing.T, tb string) []*traceback {
 			}
 			frame := parseFrame(funcName, args)
 			cur.frames = append(cur.frames, frame)
+		case elidedRe.MatchString(line):
+			// "...N frames elided..."
+			nStr := elidedRe.FindStringSubmatch(line)
+			n, _ := strconv.Atoi(nStr[1])
+			frame := &tbFrame{elided: n}
+			cur.frames = append(cur.frames, frame)
 		}
 	}
 	return tbs
-- 
2.50.0