From 36a432f27bbcc65fb03845ebe5e4a3db6f4cc189 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 3 Apr 2019 14:00:12 -0400 Subject: [PATCH] runtime: make copystack/sudog synchronization more explicit When we copy a stack of a goroutine blocked in a channel operation, we have to be very careful because other goroutines may be writing to that goroutine's stack. To handle this, stack copying acquires the locks for the channels a goroutine is waiting on. One complication is that stack growth may happen while a goroutine holds these locks, in which case stack copying must *not* acquire these locks because that would self-deadlock. Currently, stack growth never acquires these locks because stack growth only happens when a goroutine is running, which means it's either not blocking on a channel or it's holding the channel locks already. Stack shrinking always acquires these locks because shrinking happens asynchronously, so the goroutine is never running, so there are either no locks or they've been released by the goroutine. However, we're about to change when stack shrinking can happen, which is going to break the current rules. Rather than find a new way to derive whether to acquire these locks or not, this CL simply adds a flag to the g struct that indicates that stack copying should acquire channel locks. This flag is set while the goroutine is blocked on a channel op. For #10958, #24543. Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab Reviewed-on: https://go-review.googlesource.com/c/go/+/172982 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Michael Knyszek --- src/runtime/chan.go | 14 ++++++++-- src/runtime/runtime2.go | 54 +++++++++++++++++++++----------------- src/runtime/select.go | 4 +++ src/runtime/sizeof_test.go | 2 +- src/runtime/stack.go | 30 ++++++++------------- 5 files changed, 58 insertions(+), 46 deletions(-) diff --git a/src/runtime/chan.go b/src/runtime/chan.go index 93afe90dad..677af99eac 100644 --- a/src/runtime/chan.go +++ b/src/runtime/chan.go @@ -249,7 +249,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { gp.waiting = mysg gp.param = nil c.sendq.enqueue(mysg) - goparkunlock(&c.lock, waitReasonChanSend, traceEvGoBlockSend, 3) + gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceEvGoBlockSend, 2) // Ensure the value being sent is kept alive until the // receiver copies it out. The sudog has a pointer to the // stack object, but sudogs aren't considered as roots of the @@ -261,6 +261,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { throw("G waiting list is corrupted") } gp.waiting = nil + gp.activeStackChans = false if gp.param == nil { if c.closed == 0 { throw("chansend: spurious wakeup") @@ -559,13 +560,14 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) mysg.c = c gp.param = nil c.recvq.enqueue(mysg) - goparkunlock(&c.lock, waitReasonChanReceive, traceEvGoBlockRecv, 3) + gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceEvGoBlockRecv, 2) // someone woke us up if mysg != gp.waiting { throw("G waiting list is corrupted") } gp.waiting = nil + gp.activeStackChans = false if mysg.releasetime > 0 { blockevent(mysg.releasetime-t0, 2) } @@ -632,6 +634,14 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { goready(gp, skip+1) } +func chanparkcommit(gp *g, chanLock unsafe.Pointer) bool { + // There are unlocked sudogs that point into gp's stack. Stack + // copying must lock the channels of those sudogs. + gp.activeStackChans = true + unlock((*mutex)(chanLock)) + return true +} + // compiler implements // // select { diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index a146f47446..bf56466e08 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -404,30 +404,36 @@ type g struct { stackguard0 uintptr // offset known to liblink stackguard1 uintptr // offset known to liblink - _panic *_panic // innermost panic - offset known to liblink - _defer *_defer // innermost defer - m *m // current m; offset known to arm liblink - sched gobuf - syscallsp uintptr // if status==Gsyscall, syscallsp = sched.sp to use during gc - syscallpc uintptr // if status==Gsyscall, syscallpc = sched.pc to use during gc - stktopsp uintptr // expected sp at top of stack, to check in traceback - param unsafe.Pointer // passed parameter on wakeup - atomicstatus uint32 - stackLock uint32 // sigprof/scang lock; TODO: fold in to atomicstatus - goid int64 - schedlink guintptr - waitsince int64 // approx time when the g become blocked - waitreason waitReason // if status==Gwaiting - preempt bool // preemption signal, duplicates stackguard0 = stackpreempt - preemptStop bool // transition to _Gpreempted on preemption; otherwise, just deschedule - paniconfault bool // panic (instead of crash) on unexpected fault address - gcscandone bool // g has scanned stack; protected by _Gscan bit in status - throwsplit bool // must not split stack - raceignore int8 // ignore race detection events - sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine - sysexitticks int64 // cputicks when syscall has returned (for tracing) - traceseq uint64 // trace event sequencer - tracelastp puintptr // last P emitted an event for this goroutine + _panic *_panic // innermost panic - offset known to liblink + _defer *_defer // innermost defer + m *m // current m; offset known to arm liblink + sched gobuf + syscallsp uintptr // if status==Gsyscall, syscallsp = sched.sp to use during gc + syscallpc uintptr // if status==Gsyscall, syscallpc = sched.pc to use during gc + stktopsp uintptr // expected sp at top of stack, to check in traceback + param unsafe.Pointer // passed parameter on wakeup + atomicstatus uint32 + stackLock uint32 // sigprof/scang lock; TODO: fold in to atomicstatus + goid int64 + schedlink guintptr + waitsince int64 // approx time when the g become blocked + waitreason waitReason // if status==Gwaiting + preempt bool // preemption signal, duplicates stackguard0 = stackpreempt + preemptStop bool // transition to _Gpreempted on preemption; otherwise, just deschedule + paniconfault bool // panic (instead of crash) on unexpected fault address + gcscandone bool // g has scanned stack; protected by _Gscan bit in status + throwsplit bool // must not split stack + // activeStackChans indicates that there are unlocked channels + // pointing into this goroutine's stack. If true, stack + // copying needs to acquire channel locks to protect these + // areas of the stack. + activeStackChans bool + + raceignore int8 // ignore race detection events + sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine + sysexitticks int64 // cputicks when syscall has returned (for tracing) + traceseq uint64 // trace event sequencer + tracelastp puintptr // last P emitted an event for this goroutine lockedm muintptr sig uint32 writebuf []byte diff --git a/src/runtime/select.go b/src/runtime/select.go index d2c5a03a1a..8033b6512f 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -75,6 +75,9 @@ func selunlock(scases []scase, lockorder []uint16) { } func selparkcommit(gp *g, _ unsafe.Pointer) bool { + // There are unlocked sudogs that point into gp's stack. Stack + // copying must lock the channels of those sudogs. + gp.activeStackChans = true // This must not access gp's stack (see gopark). In // particular, it must not access the *hselect. That's okay, // because by the time this is called, gp.waiting has all @@ -311,6 +314,7 @@ loop: // wait for someone to wake us up gp.param = nil gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1) + gp.activeStackChans = false sellock(scases, lockorder) diff --git a/src/runtime/sizeof_test.go b/src/runtime/sizeof_test.go index 406a38aad9..852244d425 100644 --- a/src/runtime/sizeof_test.go +++ b/src/runtime/sizeof_test.go @@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) { _32bit uintptr // size on 32bit platforms _64bit uintptr // size on 64bit platforms }{ - {runtime.G{}, 212, 368}, // g, but exported for testing + {runtime.G{}, 216, 376}, // g, but exported for testing } for _, tt := range tests { diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 2c2a88e6e1..e47f12a8dc 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -786,10 +786,6 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { } // Lock channels to prevent concurrent send/receive. - // It's important that we *only* do this for async - // copystack; otherwise, gp may be in the middle of - // putting itself on wait queues and this would - // self-deadlock. var lastc *hchan for sg := gp.waiting; sg != nil; sg = sg.waitlink { if sg.c != lastc { @@ -826,12 +822,7 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // Copies gp's stack to a new stack of a different size. // Caller must have changed gp status to Gcopystack. -// -// If sync is true, this is a self-triggered stack growth and, in -// particular, no other G may be writing to gp's stack (e.g., via a -// channel operation). If sync is false, copystack protects against -// concurrent channel operations. -func copystack(gp *g, newsize uintptr, sync bool) { +func copystack(gp *g, newsize uintptr) { if gp.syscallsp != 0 { throw("stack growth not allowed in system call") } @@ -857,15 +848,16 @@ func copystack(gp *g, newsize uintptr, sync bool) { // Adjust sudogs, synchronizing with channel ops if necessary. ncopy := used - if sync { + if !gp.activeStackChans { adjustsudogs(gp, &adjinfo) } else { - // sudogs can point in to the stack. During concurrent - // shrinking, these areas may be written to. Find the - // highest such pointer so we can handle everything - // there and below carefully. (This shouldn't be far - // from the bottom of the stack, so there's little - // cost in handling everything below it carefully.) + // sudogs may be pointing in to the stack and gp has + // released channel locks, so other goroutines could + // be writing to gp's stack. Find the highest such + // pointer so we can handle everything there and below + // carefully. (This shouldn't be far from the bottom + // of the stack, so there's little cost in handling + // everything below it carefully.) adjinfo.sghi = findsghi(gp, old) // Synchronize with channel ops and copy the part of @@ -1040,7 +1032,7 @@ func newstack() { // The concurrent GC will not scan the stack while we are doing the copy since // the gp is in a Gcopystack status. - copystack(gp, newsize, true) + copystack(gp, newsize) if stackDebug >= 1 { print("stack grow done\n") } @@ -1120,7 +1112,7 @@ func shrinkstack(gp *g) { print("shrinking stack ", oldsize, "->", newsize, "\n") } - copystack(gp, newsize, false) + copystack(gp, newsize) } // freeStackSpans frees unused stack spans at the end of GC. -- 2.48.1