From 643d816c8b4348850a8a2a622d73256beea104cd Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Mon, 5 Feb 2024 16:38:41 -0500 Subject: [PATCH] runtime: traceback: include pc=0x%x for inline frames Crash monitoring tools may parse the PC values and feed them to CallersFrames, which does not run the inline unwinder, since Callers already did so. So, the GOTRACEBACK=system output must also include PC values even for inlined frames. (The actual values are just marker NOP instructions, but that isn't important.) This CL also includes a test that the PC values can be parsed out of the crash report and fed to CallersFrames to yield a sensible result. (The logic is a distillation of the x/telemetry crashmonitor.) The previously printed PCs were in fact slightly wrong for frames containing inlined calls: instead of the virtual CALL instruction (a NOP) to the first inlined call, it would display the PC of the CALL in the innermost inlined function. Change-Id: I64a06771fc191ba16c1383b8139b714f4f299703 Reviewed-on: https://go-review.googlesource.com/c/go/+/561635 LUCI-TryBot-Result: Go LUCI Auto-Submit: Alan Donovan Reviewed-by: Cherry Mui --- src/runtime/crash_test.go | 13 ++ src/runtime/traceback.go | 24 ++- src/runtime/traceback_system_test.go | 235 +++++++++++++++++++++++++++ 3 files changed, 266 insertions(+), 6 deletions(-) create mode 100644 src/runtime/traceback_system_test.go diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go index 7fc620ac50..2eddbcaefc 100644 --- a/src/runtime/crash_test.go +++ b/src/runtime/crash_test.go @@ -10,6 +10,7 @@ import ( "flag" "fmt" "internal/testenv" + "log" "os" "os/exec" "path/filepath" @@ -23,7 +24,19 @@ import ( var toRemove []string +const entrypointVar = "RUNTIME_TEST_ENTRYPOINT" + func TestMain(m *testing.M) { + switch entrypoint := os.Getenv(entrypointVar); entrypoint { + case "crash": + crash() + panic("unreachable") + default: + log.Fatalf("invalid %s: %q", entrypointVar, entrypoint) + case "": + // fall through to normal behavior + } + _, coreErrBefore := os.Stat("core") status := m.Run() diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 1c75c447d2..61027ea89a 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -993,12 +993,24 @@ func traceback2(u *unwinder, showRuntime bool, skip, max int) (n, lastN int) { } print(")\n") print("\t", file, ":", line) - if !iu.isInlined(uf) { - if u.frame.pc > f.entry() { - print(" +", hex(u.frame.pc-f.entry())) - } - if gp.m != nil && gp.m.throwing >= throwTypeRuntime && gp == gp.m.curg || level >= 2 { - print(" fp=", hex(u.frame.fp), " sp=", hex(u.frame.sp), " pc=", hex(u.frame.pc)) + // The contract between Callers and CallersFrames uses + // return addresses, which are +1 relative to the CALL + // instruction. Follow that convention. + pc := uf.pc + 1 + if !iu.isInlined(uf) && pc > f.entry() { + // Func-relative PCs make no sense for inlined + // frames because there is no actual entry. + print(" +", hex(pc-f.entry())) + } + if gp.m != nil && gp.m.throwing >= throwTypeRuntime && gp == gp.m.curg || level >= 2 { + if !iu.isInlined(uf) { + // The stack information makes no sense for inline frames. + print(" fp=", hex(u.frame.fp), " sp=", hex(u.frame.sp), " pc=", hex(pc)) + } else { + // The PC for an inlined frame is a special marker NOP, + // but crash monitoring tools may still parse the PCs + // and feed them to CallersFrames. + print(" pc=", hex(pc)) } } print("\n") diff --git a/src/runtime/traceback_system_test.go b/src/runtime/traceback_system_test.go new file mode 100644 index 0000000000..6231586e44 --- /dev/null +++ b/src/runtime/traceback_system_test.go @@ -0,0 +1,235 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +// This test of GOTRACEBACK=system has its own file, +// to minimize line-number perturbation. + +import ( + "bytes" + "fmt" + "internal/testenv" + "io" + "os" + "path/filepath" + "reflect" + "runtime" + "runtime/debug" + "strconv" + "strings" + "testing" +) + +// This is the entrypoint of the child process used by +// TestTracebackSystem. It prints a crash report to stdout. +func crash() { + // Ensure that we get pc=0x%x values in the traceback. + debug.SetTraceback("system") + writeSentinel(os.Stdout) + debug.SetCrashOutput(os.Stdout) + + go func() { + // This call is typically inlined. + child() + }() + select {} +} + +func child() { + grandchild() +} + +func grandchild() { + // Write runtime.Caller's view of the stack to stderr, for debugging. + var pcs [16]uintptr + n := runtime.Callers(1, pcs[:]) + io.WriteString(os.Stderr, formatStack(pcs[:n])) + + // Cause the crash report to be written to stdout. + panic("oops") +} + +// TestTracebackSystem tests that the syntax of crash reports produced +// by GOTRACEBACK=system (see traceback2) contains a complete, +// parseable list of program counters for the running goroutine that +// can be parsed and fed to runtime.CallersFrames to obtain accurate +// information about the logical call stack, even in the presence of +// inlining. +// +// The test is a distillation of the crash monitor in +// golang.org/x/telemetry/crashmonitor. +func TestTracebackSystem(t *testing.T) { + testenv.MustHaveExec(t) + + // Fork+exec the crashing process. + exe, err := os.Executable() + if err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, exe) + cmd.Env = append(cmd.Environ(), entrypointVar+"=crash") + cmd.Stdout = new(strings.Builder) + // cmd.Stderr = os.Stderr // uncomment to debug, e.g. to see runtime.Caller's view + cmd.Run() // expected to crash + crash := cmd.Stdout.(*strings.Builder).String() + + // If the only line is the sentinel, it wasn't a crash. + if strings.Count(crash, "\n") < 2 { + t.Fatalf("child process did not produce a crash report") + } + + // Parse the PCs out of the child's crash report. + pcs, err := parseStackPCs(crash) + if err != nil { + t.Fatal(err) + } + + // Unwind the stack using this executable's symbol table. + got := formatStack(pcs) + want := `redacted.go:0: runtime.gopanic +traceback_system_test.go:51: runtime_test.grandchild: panic("oops") +traceback_system_test.go:41: runtime_test.child: grandchild() +traceback_system_test.go:35: runtime_test.crash.func1: child() +redacted.go:0: runtime.goexit` + if strings.TrimSpace(got) != strings.TrimSpace(want) { + t.Errorf("got:\n%swant:\n%s", got, want) + } +} + +// parseStackPCs parses the parent process's program counters for the +// first running goroutine out of a GOTRACEBACK=system traceback, +// adjusting them so that they are valid for the child process's text +// segment. +// +// This function returns only program counter values, ensuring that +// there is no possibility of strings from the crash report (which may +// contain PII) leaking into the telemetry system. +// +// (Copied from golang.org/x/telemetry/crashmonitor.parseStackPCs.) +func parseStackPCs(crash string) ([]uintptr, error) { + // getPC parses the PC out of a line of the form: + // \tFILE:LINE +0xRELPC sp=... fp=... pc=... + getPC := func(line string) (uint64, error) { + _, pcstr, ok := strings.Cut(line, " pc=") // e.g. pc=0x%x + if !ok { + return 0, fmt.Errorf("no pc= for stack frame: %s", line) + } + return strconv.ParseUint(pcstr, 0, 64) // 0 => allow 0x prefix + } + + var ( + pcs []uintptr + parentSentinel uint64 + childSentinel = sentinel() + on = false // are we in the first running goroutine? + lines = strings.Split(crash, "\n") + ) + for i := 0; i < len(lines); i++ { + line := lines[i] + + // Read sentinel value. + if parentSentinel == 0 && strings.HasPrefix(line, "sentinel ") { + _, err := fmt.Sscanf(line, "sentinel %x", &parentSentinel) + if err != nil { + return nil, fmt.Errorf("can't read sentinel line") + } + continue + } + + // Search for "goroutine GID [STATUS]" + if !on { + if strings.HasPrefix(line, "goroutine ") && + strings.Contains(line, " [running]:") { + on = true + + if parentSentinel == 0 { + return nil, fmt.Errorf("no sentinel value in crash report") + } + } + continue + } + + // A blank line marks end of a goroutine stack. + if line == "" { + break + } + + // Skip the final "created by SYMBOL in goroutine GID" part. + if strings.HasPrefix(line, "created by ") { + break + } + + // Expect a pair of lines: + // SYMBOL(ARGS) + // \tFILE:LINE +0xRELPC sp=0x%x fp=0x%x pc=0x%x + // Note: SYMBOL may contain parens "pkg.(*T).method" + // The RELPC is sometimes missing. + + // Skip the symbol(args) line. + i++ + if i == len(lines) { + break + } + line = lines[i] + + // Parse the PC, and correct for the parent and child's + // different mappings of the text section. + pc, err := getPC(line) + if err != nil { + // Inlined frame, perhaps; skip it. + continue + } + pcs = append(pcs, uintptr(pc-parentSentinel+childSentinel)) + } + return pcs, nil +} + +// The sentinel function returns its address. The difference between +// this value as observed by calls in two different processes of the +// same executable tells us the relative offset of their text segments. +// +// It would be nice if SetCrashOutput took care of this as it's fiddly +// and likely to confuse every user at first. +func sentinel() uint64 { + return uint64(reflect.ValueOf(sentinel).Pointer()) +} + +func writeSentinel(out io.Writer) { + fmt.Fprintf(out, "sentinel %x\n", sentinel()) +} + +// formatStack formats a stack of PC values using the symbol table, +// redacting information that cannot be relied upon in the test. +func formatStack(pcs []uintptr) string { + // When debugging, show file/line/content of files other than this one. + const debug = false + + var buf strings.Builder + i := 0 + frames := runtime.CallersFrames(pcs) + for { + fr, more := frames.Next() + if debug { + fmt.Fprintf(&buf, "pc=%x ", pcs[i]) + i++ + } + if base := filepath.Base(fr.File); base == "traceback_system_test.go" || debug { + content, err := os.ReadFile(fr.File) + if err != nil { + panic(err) + } + lines := bytes.Split(content, []byte("\n")) + fmt.Fprintf(&buf, "%s:%d: %s: %s\n", base, fr.Line, fr.Function, lines[fr.Line-1]) + } else { + // For robustness, don't show file/line for functions from other files. + fmt.Fprintf(&buf, "redacted.go:0: %s\n", fr.Function) + } + + if !more { + break + } + } + return buf.String() +} -- 2.48.1