]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: implement GOTRACEBACK=crash for linux/386
authorRuss Cox <rsc@golang.org>
Tue, 16 Jun 2015 20:37:48 +0000 (16:37 -0400)
committerRuss Cox <rsc@golang.org>
Tue, 16 Jun 2015 20:47:47 +0000 (20:47 +0000)
Change-Id: I401ce8d612160a4f4ee617bddca6827fa544763a
Reviewed-on: https://go-review.googlesource.com/11087
Reviewed-by: Austin Clements <austin@google.com>
src/runtime/signal_386.go

index f3c36cb07a9708f7d3e03222a55056cb5c7e5a45..8fb197952ee8f0f2f779bf2560be234bbc932d20 100644 (file)
@@ -24,6 +24,8 @@ func dumpregs(c *sigctxt) {
        print("gs     ", hex(c.gs()), "\n")
 }
 
+var crashing int32
+
 // May run during STW, so write barriers are not allowed.
 //go:nowritebarrier
 func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
@@ -101,7 +103,10 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
 
        _g_.m.throwing = 1
        _g_.m.caughtsig.set(gp)
-       startpanic()
+
+       if crashing == 0 {
+               startpanic()
+       }
 
        if sig < uint32(len(sigtable)) {
                print(sigtable[sig].name, "\n")
@@ -109,7 +114,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
                print("Signal ", sig, "\n")
        }
 
-       print("PC=", hex(c.eip()), "\n")
+       print("PC=", hex(c.eip()), " m=", _g_.m.id, "\n")
        if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
                print("signal arrived during cgo execution\n")
                gp = _g_.m.lockedg
@@ -119,13 +124,62 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
        var docrash bool
        if gotraceback(&docrash) > 0 {
                goroutineheader(gp)
-               tracebacktrap(uintptr(c.eip()), uintptr(c.esp()), 0, gp)
-               tracebackothers(gp)
-               print("\n")
+
+               // On Linux/386, all system calls go through the vdso kernel_vsyscall routine.
+               // Normally we don't see those PCs, but during signals we can.
+               // If we see a PC in the vsyscall area (it moves around, but near the top of memory),
+               // assume we're blocked in the vsyscall routine, which has saved
+               // three words on the stack after the initial call saved the caller PC.
+               // Pop all four words off SP and use the saved PC.
+               // The check of the stack bounds here should suffice to avoid a fault
+               // during the actual PC pop.
+               // If we do load a bogus PC, not much harm done: we weren't going
+               // to get a decent traceback anyway.
+               // TODO(rsc): Make this more precise: we should do more checks on the PC,
+               // and we should find out whether different versions of the vdso page
+               // use different prologues that store different amounts on the stack.
+               pc := uintptr(c.eip())
+               sp := uintptr(c.esp())
+               if GOOS == "linux" && pc >= 0xf4000000 && gp.stack.lo <= sp && sp+16 <= gp.stack.hi {
+                       // Assume in vsyscall page.
+                       sp += 16
+                       pc = *(*uintptr)(unsafe.Pointer(sp - 4))
+                       print("runtime: unwind vdso kernel_vsyscall: pc=", hex(pc), " sp=", hex(sp), "\n")
+               }
+
+               tracebacktrap(pc, sp, 0, gp)
+               if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
+                       // tracebackothers on original m skipped this one; trace it now.
+                       goroutineheader(_g_.m.curg)
+                       traceback(^uintptr(0), ^uintptr(0), 0, gp)
+               } else if crashing == 0 {
+                       tracebackothers(gp)
+                       print("\n")
+               }
                dumpregs(c)
        }
 
        if docrash {
+               // TODO(rsc): Implement raiseproc on other systems
+               // and then add to this if condition.
+               if GOOS == "linux" {
+                       crashing++
+                       if crashing < sched.mcount {
+                               // There are other m's that need to dump their stacks.
+                               // Relay SIGQUIT to the next m by sending it to the current process.
+                               // All m's that have already received SIGQUIT have signal masks blocking
+                               // receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
+                               // When the last m receives the SIGQUIT, it will fall through to the call to
+                               // crash below. Just in case the relaying gets botched, each m involved in
+                               // the relay sleeps for 5 seconds and then does the crash/exit itself.
+                               // In expected operation, the last m has received the SIGQUIT and run
+                               // crash/exit and the process is gone, all long before any of the
+                               // 5-second sleeps have finished.
+                               print("\n-----\n\n")
+                               raiseproc(_SIGQUIT)
+                               usleep(5 * 1000 * 1000)
+                       }
+               }
                crash()
        }