From 7dcd343ed641d3b70c09153d3b041ca3fe83b25e Mon Sep 17 00:00:00 2001
From: Dan Scales <danscales@google.com>
Date: Wed, 9 Oct 2019 12:18:26 -0700
Subject: [PATCH] runtime: ensure that Goexit cannot be aborted by a recursive
 panic/recover

When we do a successful recover of a panic, we resume normal execution by
returning from the frame that had the deferred call that did the recover (after
executing any remaining deferred calls in that frame).

However, suppose we have called runtime.Goexit and there is a panic during one of the
deferred calls run by the Goexit. Further assume that there is a deferred call in
the frame of the Goexit or a parent frame that does a recover. Then the recovery
process will actually resume normal execution above the Goexit frame and hence
abort the Goexit.  We will not terminate the thread as expected, but continue
running in the frame above the Goexit.

To fix this, we explicitly create a _panic object for a Goexit call. We then
change the "abort" behavior for Goexits, but not panics. After a recovery, if the
top-level panic is actually a Goexit that is marked to be aborted, then we return
to the Goexit defer-processing loop, so that the Goexit is not actually aborted.

Actual code changes are just panic.go, runtime2.go, and funcid.go. Adjusted the
test related to the new Goexit behavior (TestRecoverBeforePanicAfterGoexit) and
added several new tests of aborted panics (whose behavior has not changed).

Fixes #29226

Change-Id: Ib13cb0074f5acc2567a28db7ca6912cfc47eecb5
Reviewed-on: https://go-review.googlesource.com/c/go/+/200081
Run-TryBot: Dan Scales <danscales@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/internal/objabi/funcid.go         |  3 +
 src/runtime/callers_test.go               | 62 ++++++++++++++++++
 src/runtime/crash_test.go                 | 43 +++++++-----
 src/runtime/defer_test.go                 | 14 ++--
 src/runtime/panic.go                      | 79 ++++++++++++++++++++---
 src/runtime/runtime2.go                   |  3 +
 src/runtime/testdata/testprog/deadlock.go | 58 +++++++++++++++++
 test/fixedbugs/issue8047b.go              |  4 ++
 8 files changed, 232 insertions(+), 34 deletions(-)
diff --git a/src/cmd/internal/objabi/funcid.go b/src/cmd/internal/objabi/funcid.go
index 2eb91cd2bd..0fda1db178 100644
--- a/src/cmd/internal/objabi/funcid.go
+++ b/src/cmd/internal/objabi/funcid.go
@@ -94,6 +94,9 @@ func GetFuncID(name, file string) FuncID {
 	case "runtime.runOpenDeferFrame":
 		// Don't show in the call stack (used when invoking defer functions)
 		return FuncID_wrapper
+	case "runtime.reflectcallSave":
+		// Don't show in the call stack (used when invoking defer functions)
+		return FuncID_wrapper
 	}
 	if file == "<autogenerated>" {
 		return FuncID_wrapper
diff --git a/src/runtime/callers_test.go b/src/runtime/callers_test.go
index eee1d5c867..3cd1b40ec9 100644
--- a/src/runtime/callers_test.go
+++ b/src/runtime/callers_test.go
@@ -147,6 +147,68 @@ func TestCallersAfterRecovery(t *testing.T) {
 	panic(1)
 }
 
+func TestCallersAbortedPanic(t *testing.T) {
+	want := []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic.func2", "runtime_test.TestCallersAbortedPanic"}
+
+	defer func() {
+		r := recover()
+		if r != nil {
+			t.Fatalf("should be no panic remaining to recover")
+		}
+	}()
+
+	defer func() {
+		// panic2 was aborted/replaced by panic1, so when panic2 was
+		// recovered, there is no remaining panic on the stack.
+		pcs := make([]uintptr, 20)
+		pcs = pcs[:runtime.Callers(0, pcs)]
+		testCallersEqual(t, pcs, want)
+	}()
+	defer func() {
+		r := recover()
+		if r != "panic2" {
+			t.Fatalf("got %v, wanted %v", r, "panic2")
+		}
+	}()
+	defer func() {
+		// panic2 aborts/replaces panic1, because it is a recursive panic
+		// that is not recovered within the defer function called by
+		// panic1 panicking sequence
+		panic("panic2")
+	}()
+	panic("panic1")
+}
+
+func TestCallersAbortedPanic2(t *testing.T) {
+	want := []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic2.func2", "runtime_test.TestCallersAbortedPanic2"}
+	defer func() {
+		r := recover()
+		if r != nil {
+			t.Fatalf("should be no panic remaining to recover")
+		}
+	}()
+	defer func() {
+		pcs := make([]uintptr, 20)
+		pcs = pcs[:runtime.Callers(0, pcs)]
+		testCallersEqual(t, pcs, want)
+	}()
+	func() {
+		defer func() {
+			r := recover()
+			if r != "panic2" {
+				t.Fatalf("got %v, wanted %v", r, "panic2")
+			}
+		}()
+		func() {
+			defer func() {
+				// Again, panic2 aborts/replaces panic1
+				panic("panic2")
+			}()
+			panic("panic1")
+		}()
+	}()
+}
+
 func TestCallersNilPointerPanic(t *testing.T) {
 	// Make sure we don't have any extra frames on the stack (due to
 	// open-coded defer processing)
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index ad1f29b254..0bee734a27 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -284,6 +284,17 @@ func TestRecursivePanic3(t *testing.T) {
 
 }
 
+func TestRecursivePanic4(t *testing.T) {
+	output := runTestProg(t, "testprog", "RecursivePanic4")
+	want := `panic: first panic [recovered]
+	panic: second panic
+`
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+
+}
+
 func TestGoexitCrash(t *testing.T) {
 	output := runTestProg(t, "testprog", "GoexitExit")
 	want := "no goroutines (main called runtime.Goexit) - deadlock!"
@@ -415,23 +426,21 @@ func TestRecoveredPanicAfterGoexit(t *testing.T) {
 }
 
 func TestRecoverBeforePanicAfterGoexit(t *testing.T) {
-	// 1. defer a function that recovers
-	// 2. defer a function that panics
-	// 3. call goexit
-	// Goexit should run the #2 defer. Its panic
-	// should be caught by the #1 defer, and execution
-	// should resume in the caller. Like the Goexit
-	// never happened!
-	defer func() {
-		r := recover()
-		if r == nil {
-			panic("bad recover")
-		}
-	}()
-	defer func() {
-		panic("hello")
-	}()
-	runtime.Goexit()
+	t.Parallel()
+	output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit")
+	want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
+
+func TestRecoverBeforePanicAfterGoexit2(t *testing.T) {
+	t.Parallel()
+	output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit2")
+	want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
 }
 
 func TestNetpollDeadlock(t *testing.T) {
diff --git a/src/runtime/defer_test.go b/src/runtime/defer_test.go
index f03bdb47d5..51cd4bb9cc 100644
--- a/src/runtime/defer_test.go
+++ b/src/runtime/defer_test.go
@@ -121,18 +121,16 @@ func TestDisappearingDefer(t *testing.T) {
 }
 
 // This tests an extra recursive panic behavior that is only specified in the
-// code.  Suppose a first panic P1 happens and starts processing defer calls.  If
-// a second panic P2 happens while processing defer call D in frame F, then defer
+// code. Suppose a first panic P1 happens and starts processing defer calls. If a
+// second panic P2 happens while processing defer call D in frame F, then defer
 // call processing is restarted (with some potentially new defer calls created by
-// D or its callees).  If the defer processing reaches the started defer call D
+// D or its callees). If the defer processing reaches the started defer call D
 // again in the defer stack, then the original panic P1 is aborted and cannot
-// continue panic processing or be recovered.  If the panic P2 does a recover at
-// some point, it will naturally the original panic P1 from the stack, since the
-// original panic had to be in frame F or a descendant of F.
+// continue panic processing or be recovered. If the panic P2 does a recover at
+// some point, it will naturally remove the original panic P1 from the stack
+// (since the original panic had to be in frame F or a descendant of F).
 func TestAbortedPanic(t *testing.T) {
 	defer func() {
-		// The first panic should have been "aborted", so there is
-		// no other panic to recover
 		r := recover()
 		if r != nil {
 			t.Fatal(fmt.Sprintf("wanted nil recover, got %v", r))
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index bdfe117e45..31bf31110f 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -577,8 +577,15 @@ func Goexit() {
 	// This code is similar to gopanic, see that implementation
 	// for detailed comments.
 	gp := getg()
-	addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
 
+	// Create a panic object for Goexit, so we can recognize when it might be
+	// bypassed by a recover().
+	var p _panic
+	p.goexit = true
+	p.link = gp._panic
+	gp._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
+
+	addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
 	for {
 		d := gp._defer
 		if d == nil {
@@ -597,6 +604,7 @@ func Goexit() {
 			}
 		}
 		d.started = true
+		d._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
 		if d.openDefer {
 			done := runOpenDeferFrame(gp, d)
 			if !done {
@@ -605,9 +613,29 @@ func Goexit() {
 				// defer that can be recovered.
 				throw("unfinished open-coded defers in Goexit")
 			}
-			addOneOpenDeferFrame(gp, 0, nil)
+			if p.aborted {
+				// Since our current defer caused a panic and may
+				// have been already freed, just restart scanning
+				// for open-coded defers from this frame again.
+				addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
+			} else {
+				addOneOpenDeferFrame(gp, 0, nil)
+			}
 		} else {
-			reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz))
+
+			// Save the pc/sp in reflectcallSave(), so we can "recover" back to this
+			// loop if necessary.
+			reflectcallSave(&p, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz))
+		}
+		if p.aborted {
+			// We had a recursive panic in the defer d we started, and
+			// then did a recover in a defer that was further down the
+			// defer chain than d. In the case of an outstanding Goexit,
+			// we force the recover to return back to this loop. d will
+			// have already been freed if completed, so just continue
+			// immediately to the next defer on the chain.
+			p.aborted = false
+			continue
 		}
 		if gp._defer != d {
 			throw("bad defer entry in Goexit")
@@ -645,7 +673,12 @@ func preprintpanics(p *_panic) {
 func printpanics(p *_panic) {
 	if p.link != nil {
 		printpanics(p.link)
-		print("\t")
+		if !p.link.goexit {
+			print("\t")
+		}
+	}
+	if p.goexit {
+		return
 	}
 	print("panic: ")
 	printany(p.arg)
@@ -810,10 +843,11 @@ func runOpenDeferFrame(gp *g, d *_defer) bool {
 		}
 		deferBits = deferBits &^ (1 << i)
 		*(*uint8)(unsafe.Pointer(d.varp - uintptr(deferBitsOffset))) = deferBits
-		if d._panic != nil {
-			d._panic.argp = unsafe.Pointer(getargp(0))
+		p := d._panic
+		reflectcallSave(p, unsafe.Pointer(closure), deferArgs, argWidth)
+		if p != nil && p.aborted {
+			break
 		}
-		reflectcall(nil, unsafe.Pointer(closure), deferArgs, argWidth, argWidth)
 		d.fn = nil
 		// These args are just a copy, so can be cleared immediately
 		memclrNoHeapPointers(deferArgs, uintptr(argWidth))
@@ -826,6 +860,23 @@ func runOpenDeferFrame(gp *g, d *_defer) bool {
 	return done
 }
 
+// reflectcallSave calls reflectcall after saving the caller's pc and sp in the
+// panic record. This allows the runtime to return to the Goexit defer processing
+// loop, in the unusual case where the Goexit may be bypassed by a successful
+// recover.
+func reflectcallSave(p *_panic, fn, arg unsafe.Pointer, argsize uint32) {
+	if p != nil {
+		p.argp = unsafe.Pointer(getargp(0))
+		p.pc = getcallerpc()
+		p.sp = unsafe.Pointer(getcallersp())
+	}
+	reflectcall(nil, fn, arg, argsize, argsize)
+	if p != nil {
+		p.pc = 0
+		p.sp = unsafe.Pointer(nil)
+	}
+}
+
 // The implementation of the predeclared function panic.
 func gopanic(e interface{}) {
 	gp := getg()
@@ -876,7 +927,8 @@ func gopanic(e interface{}) {
 		}
 
 		// If defer was started by earlier panic or Goexit (and, since we're back here, that triggered a new panic),
-		// take defer off list. The earlier panic or Goexit will not continue running.
+		// take defer off list. An earlier panic will not continue running, but we will make sure below that an
+		// earlier Goexit does continue running.
 		if d.started {
 			if d._panic != nil {
 				d._panic.aborted = true
@@ -933,6 +985,15 @@ func gopanic(e interface{}) {
 			freedefer(d)
 		}
 		if p.recovered {
+			gp._panic = p.link
+			if gp._panic != nil && gp._panic.goexit && gp._panic.aborted {
+				// A normal recover would bypass/abort the Goexit.  Instead,
+				// we return to the processing loop of the Goexit.
+				gp.sigcode0 = uintptr(gp._panic.sp)
+				gp.sigcode1 = uintptr(gp._panic.pc)
+				mcall(recovery)
+				throw("bypassed recovery failed") // mcall should not return
+			}
 			atomic.Xadd(&runningPanicDefers, -1)
 
 			if done {
@@ -1019,7 +1080,7 @@ func gorecover(argp uintptr) interface{} {
 	// If they match, the caller is the one who can recover.
 	gp := getg()
 	p := gp._panic
-	if p != nil && !p.recovered && argp == uintptr(p.argp) {
+	if p != nil && !p.goexit && !p.recovered && argp == uintptr(p.argp) {
 		p.recovered = true
 		return p.arg
 	}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 4ee075a36a..a5471ff765 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -872,8 +872,11 @@ type _panic struct {
 	argp      unsafe.Pointer // pointer to arguments of deferred call run during panic; cannot move - known to liblink
 	arg       interface{}    // argument to panic
 	link      *_panic        // link to earlier panic
+	pc        uintptr        // where to return to in runtime if this panic is bypassed
+	sp        unsafe.Pointer // where to return to in runtime if this panic is bypassed
 	recovered bool           // whether this panic is over
 	aborted   bool           // the panic was aborted
+	goexit    bool
 }
 
 // stack traces
diff --git a/src/runtime/testdata/testprog/deadlock.go b/src/runtime/testdata/testprog/deadlock.go
index 9ca0fc344f..105d6a5faa 100644
--- a/src/runtime/testdata/testprog/deadlock.go
+++ b/src/runtime/testdata/testprog/deadlock.go
@@ -24,6 +24,7 @@ func init() {
 	register("RecursivePanic", RecursivePanic)
 	register("RecursivePanic2", RecursivePanic2)
 	register("RecursivePanic3", RecursivePanic3)
+	register("RecursivePanic4", RecursivePanic4)
 	register("GoexitExit", GoexitExit)
 	register("GoNil", GoNil)
 	register("MainGoroutineID", MainGoroutineID)
@@ -31,6 +32,8 @@ func init() {
 	register("GoexitInPanic", GoexitInPanic)
 	register("PanicAfterGoexit", PanicAfterGoexit)
 	register("RecoveredPanicAfterGoexit", RecoveredPanicAfterGoexit)
+	register("RecoverBeforePanicAfterGoexit", RecoverBeforePanicAfterGoexit)
+	register("RecoverBeforePanicAfterGoexit2", RecoverBeforePanicAfterGoexit2)
 	register("PanicTraceback", PanicTraceback)
 	register("GoschedInPanic", GoschedInPanic)
 	register("SyscallInPanic", SyscallInPanic)
@@ -146,6 +149,17 @@ func RecursivePanic3() {
 	panic("first panic")
 }
 
+// Test case where a single defer recovers one panic but starts another panic. If
+// the second panic is never recovered, then the recovered first panic will still
+// appear on the panic stack (labeled '[recovered]') and the runtime stack.
+func RecursivePanic4() {
+	defer func() {
+		recover()
+		panic("second panic")
+	}()
+	panic("first panic")
+}
+
 func GoexitExit() {
 	println("t1")
 	go func() {
@@ -237,6 +251,50 @@ func RecoveredPanicAfterGoexit() {
 	runtime.Goexit()
 }
 
+func RecoverBeforePanicAfterGoexit() {
+	// 1. defer a function that recovers
+	// 2. defer a function that panics
+	// 3. call goexit
+	// Goexit runs the #2 defer. Its panic
+	// is caught by the #1 defer.  For Goexit, we explicitly
+	// resume execution in the Goexit loop, instead of resuming
+	// execution in the caller (which would make the Goexit disappear!)
+	defer func() {
+		r := recover()
+		if r == nil {
+			panic("bad recover")
+		}
+	}()
+	defer func() {
+		panic("hello")
+	}()
+	runtime.Goexit()
+}
+
+func RecoverBeforePanicAfterGoexit2() {
+	for i := 0; i < 2; i++ {
+		defer func() {
+		}()
+	}
+	// 1. defer a function that recovers
+	// 2. defer a function that panics
+	// 3. call goexit
+	// Goexit runs the #2 defer. Its panic
+	// is caught by the #1 defer.  For Goexit, we explicitly
+	// resume execution in the Goexit loop, instead of resuming
+	// execution in the caller (which would make the Goexit disappear!)
+	defer func() {
+		r := recover()
+		if r == nil {
+			panic("bad recover")
+		}
+	}()
+	defer func() {
+		panic("hello")
+	}()
+	runtime.Goexit()
+}
+
 func PanicTraceback() {
 	pt1()
 }
diff --git a/test/fixedbugs/issue8047b.go b/test/fixedbugs/issue8047b.go
index df902a5243..5eaf9c5bac 100644
--- a/test/fixedbugs/issue8047b.go
+++ b/test/fixedbugs/issue8047b.go
@@ -10,6 +10,10 @@ package main
 
 func main() {
 	defer func() {
+		// This recover recovers the panic caused by the nil defer func
+		// g(). The original panic(1) was already aborted/replaced by this
+		// new panic, so when this recover is done, the program completes
+		// normally.
 		recover()
 	}()
 	f()
-- 
2.52.0