runtime: ensure that Goexit cannot be aborted by a recursive panic/recover

danscales · danscales · commit 7dcd343ed641 · 2019-11-04T16:32:38.000Z
When we do a successful recover of a panic, we resume normal execution by returning from the frame that had the deferred call that did the recover (after executing any remaining deferred calls in that frame). However, suppose we have called runtime.Goexit and there is a panic during one of the deferred calls run by the Goexit. Further assume that there is a deferred call in the frame of the Goexit or a parent frame that does a recover. Then the recovery process will actually resume normal execution above the Goexit frame and hence abort the Goexit. We will not terminate the thread as expected, but continue running in the frame above the Goexit. To fix this, we explicitly create a _panic object for a Goexit call. We then change the "abort" behavior for Goexits, but not panics. After a recovery, if the top-level panic is actually a Goexit that is marked to be aborted, then we return to the Goexit defer-processing loop, so that the Goexit is not actually aborted. Actual code changes are just panic.go, runtime2.go, and funcid.go. Adjusted the test related to the new Goexit behavior (TestRecoverBeforePanicAfterGoexit) and added several new tests of aborted panics (whose behavior has not changed). Fixes #29226 Change-Id: Ib13cb0074f5acc2567a28db7ca6912cfc47eecb5 Reviewed-on: https://go-review.googlesource.com/c/go/+/200081 Run-TryBot: Dan Scales <danscales@google.com> Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/internal/objabi/funcid.go b/src/cmd/internal/objabi/funcid.go
@@ -94,6 +94,9 @@ func GetFuncID(name, file string) FuncID {
 	case "runtime.runOpenDeferFrame":
 		// Don't show in the call stack (used when invoking defer functions)
 		return FuncID_wrapper
+	case "runtime.reflectcallSave":
+		// Don't show in the call stack (used when invoking defer functions)
+		return FuncID_wrapper
 	}
 	if file == "<autogenerated>" {
 		return FuncID_wrapper
diff --git a/src/runtime/callers_test.go b/src/runtime/callers_test.go
@@ -147,6 +147,68 @@ func TestCallersAfterRecovery(t *testing.T) {
 	panic(1)
 }
 
+func TestCallersAbortedPanic(t *testing.T) {
+	want := []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic.func2", "runtime_test.TestCallersAbortedPanic"}
+
+	defer func() {
+		r := recover()
+		if r != nil {
+			t.Fatalf("should be no panic remaining to recover")
+		}
+	}()
+
+	defer func() {
+		// panic2 was aborted/replaced by panic1, so when panic2 was
+		// recovered, there is no remaining panic on the stack.
+		pcs := make([]uintptr, 20)
+		pcs = pcs[:runtime.Callers(0, pcs)]
+		testCallersEqual(t, pcs, want)
+	}()
+	defer func() {
+		r := recover()
+		if r != "panic2" {
+			t.Fatalf("got %v, wanted %v", r, "panic2")
+		}
+	}()
+	defer func() {
+		// panic2 aborts/replaces panic1, because it is a recursive panic
+		// that is not recovered within the defer function called by
+		// panic1 panicking sequence
+		panic("panic2")
+	}()
+	panic("panic1")
+}
+
+func TestCallersAbortedPanic2(t *testing.T) {
+	want := []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic2.func2", "runtime_test.TestCallersAbortedPanic2"}
+	defer func() {
+		r := recover()
+		if r != nil {
+			t.Fatalf("should be no panic remaining to recover")
+		}
+	}()
+	defer func() {
+		pcs := make([]uintptr, 20)
+		pcs = pcs[:runtime.Callers(0, pcs)]
+		testCallersEqual(t, pcs, want)
+	}()
+	func() {
+		defer func() {
+			r := recover()
+			if r != "panic2" {
+				t.Fatalf("got %v, wanted %v", r, "panic2")
+			}
+		}()
+		func() {
+			defer func() {
+				// Again, panic2 aborts/replaces panic1
+				panic("panic2")
+			}()
+			panic("panic1")
+		}()
+	}()
+}
+
 func TestCallersNilPointerPanic(t *testing.T) {
 	// Make sure we don't have any extra frames on the stack (due to
 	// open-coded defer processing)
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
@@ -284,6 +284,17 @@ func TestRecursivePanic3(t *testing.T) {
 
 }
 
+func TestRecursivePanic4(t *testing.T) {
+	output := runTestProg(t, "testprog", "RecursivePanic4")
+	want := `panic: first panic [recovered]
+	panic: second panic
+`
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+
+}
+
 func TestGoexitCrash(t *testing.T) {
 	output := runTestProg(t, "testprog", "GoexitExit")
 	want := "no goroutines (main called runtime.Goexit) - deadlock!"
@@ -415,23 +426,21 @@ func TestRecoveredPanicAfterGoexit(t *testing.T) {
 }
 
 func TestRecoverBeforePanicAfterGoexit(t *testing.T) {
-	// 1. defer a function that recovers
-	// 2. defer a function that panics
-	// 3. call goexit
-	// Goexit should run the #2 defer. Its panic
-	// should be caught by the #1 defer, and execution
-	// should resume in the caller. Like the Goexit
-	// never happened!
-	defer func() {
-		r := recover()
-		if r == nil {
-			panic("bad recover")
-		}
-	}()
-	defer func() {
-		panic("hello")
-	}()
-	runtime.Goexit()
+	t.Parallel()
+	output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit")
+	want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
+
+func TestRecoverBeforePanicAfterGoexit2(t *testing.T) {
+	t.Parallel()
+	output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit2")
+	want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
 }
 
 func TestNetpollDeadlock(t *testing.T) {
diff --git a/src/runtime/defer_test.go b/src/runtime/defer_test.go
@@ -121,18 +121,16 @@ func TestDisappearingDefer(t *testing.T) {
 }
 
 // This tests an extra recursive panic behavior that is only specified in the
-// code.  Suppose a first panic P1 happens and starts processing defer calls.  If
-// a second panic P2 happens while processing defer call D in frame F, then defer
+// code. Suppose a first panic P1 happens and starts processing defer calls. If a
+// second panic P2 happens while processing defer call D in frame F, then defer
 // call processing is restarted (with some potentially new defer calls created by
-// D or its callees).  If the defer processing reaches the started defer call D
+// D or its callees). If the defer processing reaches the started defer call D
 // again in the defer stack, then the original panic P1 is aborted and cannot
-// continue panic processing or be recovered.  If the panic P2 does a recover at
-// some point, it will naturally the original panic P1 from the stack, since the
-// original panic had to be in frame F or a descendant of F.
+// continue panic processing or be recovered. If the panic P2 does a recover at
+// some point, it will naturally remove the original panic P1 from the stack
+// (since the original panic had to be in frame F or a descendant of F).
 func TestAbortedPanic(t *testing.T) {
 	defer func() {
-		// The first panic should have been "aborted", so there is
-		// no other panic to recover
 		r := recover()
 		if r != nil {
 			t.Fatal(fmt.Sprintf("wanted nil recover, got %v", r))
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
@@ -577,8 +577,15 @@ func Goexit() {
 	// This code is similar to gopanic, see that implementation
 	// for detailed comments.
 	gp := getg()
-	addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
 
+	// Create a panic object for Goexit, so we can recognize when it might be
+	// bypassed by a recover().
+	var p _panic
+	p.goexit = true
+	p.link = gp._panic
+	gp._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
+
+	addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
 	for {
 		d := gp._defer
 		if d == nil {
@@ -597,6 +604,7 @@ func Goexit() {
 			}
 		}
 		d.started = true
+		d._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
 		if d.openDefer {
 			done := runOpenDeferFrame(gp, d)
 			if !done {
@@ -605,9 +613,29 @@ func Goexit() {
 				// defer that can be recovered.
 				throw("unfinished open-coded defers in Goexit")
 			}
-			addOneOpenDeferFrame(gp, 0, nil)
+			if p.aborted {
+				// Since our current defer caused a panic and may
+				// have been already freed, just restart scanning
+				// for open-coded defers from this frame again.
+				addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
+			} else {
+				addOneOpenDeferFrame(gp, 0, nil)
+			}
 		} else {
-			reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz))
+
+			// Save the pc/sp in reflectcallSave(), so we can "recover" back to this
+			// loop if necessary.
+			reflectcallSave(&p, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz))
+		}
+		if p.aborted {
+			// We had a recursive panic in the defer d we started, and
+			// then did a recover in a defer that was further down the
+			// defer chain than d. In the case of an outstanding Goexit,
+			// we force the recover to return back to this loop. d will
+			// have already been freed if completed, so just continue
+			// immediately to the next defer on the chain.
+			p.aborted = false
+			continue
 		}
 		if gp._defer != d {
 			throw("bad defer entry in Goexit")
@@ -645,7 +673,12 @@ func preprintpanics(p *_panic) {
 func printpanics(p *_panic) {
 	if p.link != nil {
 		printpanics(p.link)
-		print("\t")
+		if !p.link.goexit {
+			print("\t")
+		}
+	}
+	if p.goexit {
+		return
 	}
 	print("panic: ")
 	printany(p.arg)
@@ -810,10 +843,11 @@ func runOpenDeferFrame(gp *g, d *_defer) bool {
 		}
 		deferBits = deferBits &^ (1 << i)
 		*(*uint8)(unsafe.Pointer(d.varp - uintptr(deferBitsOffset))) = deferBits
-		if d._panic != nil {
-			d._panic.argp = unsafe.Pointer(getargp(0))
+		p := d._panic
+		reflectcallSave(p, unsafe.Pointer(closure), deferArgs, argWidth)
+		if p != nil && p.aborted {
+			break
 		}
-		reflectcall(nil, unsafe.Pointer(closure), deferArgs, argWidth, argWidth)
 		d.fn = nil
 		// These args are just a copy, so can be cleared immediately
 		memclrNoHeapPointers(deferArgs, uintptr(argWidth))
@@ -826,6 +860,23 @@ func runOpenDeferFrame(gp *g, d *_defer) bool {
 	return done
 }
 
+// reflectcallSave calls reflectcall after saving the caller's pc and sp in the
+// panic record. This allows the runtime to return to the Goexit defer processing
+// loop, in the unusual case where the Goexit may be bypassed by a successful
+// recover.
+func reflectcallSave(p *_panic, fn, arg unsafe.Pointer, argsize uint32) {
+	if p != nil {
+		p.argp = unsafe.Pointer(getargp(0))
+		p.pc = getcallerpc()
+		p.sp = unsafe.Pointer(getcallersp())
+	}
+	reflectcall(nil, fn, arg, argsize, argsize)
+	if p != nil {
+		p.pc = 0
+		p.sp = unsafe.Pointer(nil)
+	}
+}
+
 // The implementation of the predeclared function panic.
 func gopanic(e interface{}) {
 	gp := getg()
@@ -876,7 +927,8 @@ func gopanic(e interface{}) {
 		}
 
 		// If defer was started by earlier panic or Goexit (and, since we're back here, that triggered a new panic),
-		// take defer off list. The earlier panic or Goexit will not continue running.
+		// take defer off list. An earlier panic will not continue running, but we will make sure below that an
+		// earlier Goexit does continue running.
 		if d.started {
 			if d._panic != nil {
 				d._panic.aborted = true
@@ -933,6 +985,15 @@ func gopanic(e interface{}) {
 			freedefer(d)
 		}
 		if p.recovered {
+			gp._panic = p.link
+			if gp._panic != nil && gp._panic.goexit && gp._panic.aborted {
+				// A normal recover would bypass/abort the Goexit.  Instead,
+				// we return to the processing loop of the Goexit.
+				gp.sigcode0 = uintptr(gp._panic.sp)
+				gp.sigcode1 = uintptr(gp._panic.pc)
+				mcall(recovery)
+				throw("bypassed recovery failed") // mcall should not return
+			}
 			atomic.Xadd(&runningPanicDefers, -1)
 
 			if done {
@@ -1019,7 +1080,7 @@ func gorecover(argp uintptr) interface{} {
 	// If they match, the caller is the one who can recover.
 	gp := getg()
 	p := gp._panic
-	if p != nil && !p.recovered && argp == uintptr(p.argp) {
+	if p != nil && !p.goexit && !p.recovered && argp == uintptr(p.argp) {
 		p.recovered = true
 		return p.arg
 	}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
@@ -872,8 +872,11 @@ type _panic struct {
 	argp      unsafe.Pointer // pointer to arguments of deferred call run during panic; cannot move - known to liblink
 	arg       interface{}    // argument to panic
 	link      *_panic        // link to earlier panic
+	pc        uintptr        // where to return to in runtime if this panic is bypassed
+	sp        unsafe.Pointer // where to return to in runtime if this panic is bypassed
 	recovered bool           // whether this panic is over
 	aborted   bool           // the panic was aborted
+	goexit    bool
 }
 
 // stack traces
diff --git a/src/runtime/testdata/testprog/deadlock.go b/src/runtime/testdata/testprog/deadlock.go
diff --git a/test/fixedbugs/issue8047b.go b/test/fixedbugs/issue8047b.go