"testing"
)
+func canGenerateCore(t *testing.T) bool {
+ // Ensure there is enough RLIMIT_CORE available to generate a full core.
+ var lim syscall.Rlimit
+ err := syscall.Getrlimit(syscall.RLIMIT_CORE, &lim)
+ if err != nil {
+ t.Fatalf("error getting rlimit: %v", err)
+ }
+ // Minimum RLIMIT_CORE max to allow. This is a conservative estimate.
+ // Most systems allow infinity.
+ const minRlimitCore = 100 << 20 // 100 MB
+ if lim.Max < minRlimitCore {
+ t.Skipf("RLIMIT_CORE max too low: %#+v", lim)
+ }
+
+ // Make sure core pattern will send core to the current directory.
+ b, err := os.ReadFile("/proc/sys/kernel/core_pattern")
+ if err != nil {
+ t.Fatalf("error reading core_pattern: %v", err)
+ }
+ if string(b) != "core\n" {
+ t.Skipf("Unexpected core pattern %q", string(b))
+ }
+
+ coreUsesPID := false
+ b, err = os.ReadFile("/proc/sys/kernel/core_uses_pid")
+ if err == nil {
+ switch string(bytes.TrimSpace(b)) {
+ case "0":
+ case "1":
+ coreUsesPID = true
+ default:
+ t.Skipf("unexpected core_uses_pid value %q", string(b))
+ }
+ }
+ return coreUsesPID
+}
+
const coreSignalSource = `
package main
t.Parallel()
checkGdbVersion(t)
- // Ensure there is enough RLIMIT_CORE available to generate a full core.
- var lim syscall.Rlimit
- err := syscall.Getrlimit(syscall.RLIMIT_CORE, &lim)
- if err != nil {
- t.Fatalf("error getting rlimit: %v", err)
- }
- // Minimum RLIMIT_CORE max to allow. This is a conservative estimate.
- // Most systems allow infinity.
- const minRlimitCore = 100 << 20 // 100 MB
- if lim.Max < minRlimitCore {
- t.Skipf("RLIMIT_CORE max too low: %#+v", lim)
- }
-
- // Make sure core pattern will send core to the current directory.
- b, err := os.ReadFile("/proc/sys/kernel/core_pattern")
- if err != nil {
- t.Fatalf("error reading core_pattern: %v", err)
- }
- if string(b) != "core\n" {
- t.Skipf("Unexpected core pattern %q", string(b))
- }
-
- coreUsesPID := false
- b, err = os.ReadFile("/proc/sys/kernel/core_uses_pid")
- if err == nil {
- switch string(bytes.TrimSpace(b)) {
- case "0":
- case "1":
- coreUsesPID = true
- default:
- t.Skipf("unexpected core_uses_pid value %q", string(b))
- }
- }
-
- dir := t.TempDir()
+ coreUsesPID := canGenerateCore(t)
// Build the source code.
+ dir := t.TempDir()
src := filepath.Join(dir, "main.go")
- err = os.WriteFile(src, []byte(coreSignalSource), 0644)
+ err := os.WriteFile(src, []byte(coreSignalSource), 0644)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
t.Fatalf("could not find runtime symbol in backtrace after signal handler:\n%s", rest)
}
}
+
+const coreCrashThreadSource = `
+package main
+
+/*
+#cgo CFLAGS: -g -O0
+#include <stdio.h>
+#include <stddef.h>
+void trigger_crash()
+{
+ int* ptr = NULL;
+ *ptr = 1024;
+}
+*/
+import "C"
+import (
+ "flag"
+ "fmt"
+ "os"
+ "runtime/debug"
+ "syscall"
+)
+
+func enableCore() {
+ debug.SetTraceback("crash")
+
+ var lim syscall.Rlimit
+ err := syscall.Getrlimit(syscall.RLIMIT_CORE, &lim)
+ if err != nil {
+ panic(fmt.Sprintf("error getting rlimit: %v", err))
+ }
+ lim.Cur = lim.Max
+ fmt.Fprintf(os.Stderr, "Setting RLIMIT_CORE = %+#v\n", lim)
+ err = syscall.Setrlimit(syscall.RLIMIT_CORE, &lim)
+ if err != nil {
+ panic(fmt.Sprintf("error setting rlimit: %v", err))
+ }
+}
+
+func main() {
+ flag.Parse()
+
+ enableCore()
+
+ C.trigger_crash()
+}
+`
+
+// TestGdbCoreCrashThreadBacktrace tests that runtime could let the fault thread to crash process
+// and make fault thread as number one thread while gdb in a core file
+func TestGdbCoreCrashThreadBacktrace(t *testing.T) {
+ if runtime.GOOS != "linux" {
+ // N.B. This test isn't fundamentally Linux-only, but it needs
+ // to know how to enable/find core files on each OS.
+ t.Skip("Test only supported on Linux")
+ }
+ if runtime.GOARCH != "386" && runtime.GOARCH != "amd64" {
+ // TODO(go.dev/issue/25218): Other architectures use sigreturn
+ // via VDSO, which we somehow don't handle correctly.
+ t.Skip("Backtrace through signal handler only works on 386 and amd64")
+ }
+
+ checkGdbEnvironment(t)
+ t.Parallel()
+ checkGdbVersion(t)
+
+ coreUsesPID := canGenerateCore(t)
+
+ // Build the source code.
+ dir := t.TempDir()
+ src := filepath.Join(dir, "main.go")
+ err := os.WriteFile(src, []byte(coreCrashThreadSource), 0644)
+ if err != nil {
+ t.Fatalf("failed to create file: %v", err)
+ }
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe", "main.go")
+ cmd.Dir = dir
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("building source %v\n%s", err, out)
+ }
+
+ // Start the test binary.
+ cmd = testenv.Command(t, "./a.exe")
+ cmd.Dir = dir
+ var output bytes.Buffer
+ cmd.Stdout = &output // for test logging
+ cmd.Stderr = &output
+
+ if err := cmd.Start(); err != nil {
+ t.Fatalf("error starting test binary: %v", err)
+ }
+
+ pid := cmd.Process.Pid
+
+ err = cmd.Wait()
+ t.Logf("child output:\n%s", output.String())
+ if err == nil {
+ t.Fatalf("Wait succeeded, want SIGABRT")
+ }
+ ee, ok := err.(*exec.ExitError)
+ if !ok {
+ t.Fatalf("Wait err got %T %v, want exec.ExitError", ee, ee)
+ }
+ ws, ok := ee.Sys().(syscall.WaitStatus)
+ if !ok {
+ t.Fatalf("Sys got %T %v, want syscall.WaitStatus", ee.Sys(), ee.Sys())
+ }
+ if ws.Signal() != syscall.SIGABRT {
+ t.Fatalf("Signal got %d want SIGABRT", ws.Signal())
+ }
+ if !ws.CoreDump() {
+ t.Fatalf("CoreDump got %v want true", ws.CoreDump())
+ }
+
+ coreFile := "core"
+ if coreUsesPID {
+ coreFile += fmt.Sprintf(".%d", pid)
+ }
+
+ // Execute gdb commands.
+ args := []string{"-nx", "-batch",
+ "-iex", "add-auto-load-safe-path " + filepath.Join(testenv.GOROOT(t), "src", "runtime"),
+ "-ex", "backtrace",
+ filepath.Join(dir, "a.exe"),
+ filepath.Join(dir, coreFile),
+ }
+ cmd = testenv.Command(t, "gdb", args...)
+
+ got, err := cmd.CombinedOutput()
+ t.Logf("gdb output:\n%s", got)
+ if err != nil {
+ t.Fatalf("gdb exited with error: %v", err)
+ }
+
+ re := regexp.MustCompile(`#.* trigger_crash`)
+ if found := re.Find(got) != nil; !found {
+ t.Fatalf("could not find trigger_crash in backtrace")
+ }
+}
// crashing is the number of m's we have waited for when implementing
// GOTRACEBACK=crash when a signal is received.
-var crashing int32
+var crashing atomic.Int32
// testSigtrap and testSigusr1 are used by the runtime tests. If
// non-nil, it is called on SIGTRAP/SIGUSR1. If it returns true, the
mp.throwing = throwTypeRuntime
mp.caughtsig.set(gp)
- if crashing == 0 {
+ if crashing.Load() == 0 {
startpanic_m()
}
if level > 0 {
goroutineheader(gp)
tracebacktrap(c.sigpc(), c.sigsp(), c.siglr(), gp)
- if crashing > 0 && gp != mp.curg && mp.curg != nil && readgstatus(mp.curg)&^_Gscan == _Grunning {
+ if crashing.Load() > 0 && gp != mp.curg && mp.curg != nil && readgstatus(mp.curg)&^_Gscan == _Grunning {
// tracebackothers on original m skipped this one; trace it now.
goroutineheader(mp.curg)
traceback(^uintptr(0), ^uintptr(0), 0, mp.curg)
- } else if crashing == 0 {
+ } else if crashing.Load() == 0 {
tracebackothers(gp)
print("\n")
}
}
if docrash {
- crashing++
- if crashing < mcount()-int32(extraMLength.Load()) {
+ isCrashThread := false
+ if crashing.CompareAndSwap(0, 1) {
+ isCrashThread = true
+ } else {
+ crashing.Add(1)
+ }
+ if crashing.Load() < mcount()-int32(extraMLength.Load()) {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
- // When the last m receives the SIGQUIT, it will fall through to the call to
- // crash below. Just in case the relaying gets botched, each m involved in
+ // The first m will wait until all ms received the SIGQUIT, then crash/exit.
+ // Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
- // In expected operation, the last m has received the SIGQUIT and run
- // crash/exit and the process is gone, all long before any of the
- // 5-second sleeps have finished.
+ // The faulting m is crashing first so it is the faulting thread in the core dump (see issue #63277):
+ // in expected operation, the first m will wait until the last m has received the SIGQUIT,
+ // and then run crash/exit and the process is gone.
+ // However, if it spends more than 5 seconds to send SIGQUIT to all ms,
+ // any of ms may crash/exit the process after waiting for 5 seconds.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
+ }
+ if isCrashThread {
+ i := 0
+ for (crashing.Load() < mcount()-int32(extraMLength.Load())) && i < 10 {
+ i++
+ usleep(500 * 1000)
+ }
+ } else {
usleep(5 * 1000 * 1000)
}
printDebugLog()