gp.waiting = mysg
gp.param = nil
c.sendq.enqueue(mysg)
+ // Signal to anyone trying to shrink our stack that we're about
+ // to park on a channel. The window between when this G's status
+ // changes and when we set gp.activeStackChans is not safe for
+ // stack shrinking.
+ atomic.Store8(&gp.parkingOnChan, 1)
gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceEvGoBlockSend, 2)
// Ensure the value being sent is kept alive until the
// receiver copies it out. The sudog has a pointer to the
mysg.c = c
gp.param = nil
c.recvq.enqueue(mysg)
+ // Signal to anyone trying to shrink our stack that we're about
+ // to park on a channel. The window between when this G's status
+ // changes and when we set gp.activeStackChans is not safe for
+ // stack shrinking.
+ atomic.Store8(&gp.parkingOnChan, 1)
gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceEvGoBlockRecv, 2)
// someone woke us up
func chanparkcommit(gp *g, chanLock unsafe.Pointer) bool {
// There are unlocked sudogs that point into gp's stack. Stack
// copying must lock the channels of those sudogs.
+ // Set activeStackChans here instead of before we try parking
+ // because we could self-deadlock in stack growth on the
+ // channel lock.
gp.activeStackChans = true
+ // Mark that it's safe for stack shrinking to occur now,
+ // because any thread acquiring this G's stack for shrinking
+ // is guaranteed to observe activeStackChans after this store.
+ atomic.Store8(&gp.parkingOnChan, 0)
+ // Make sure we unlock after setting activeStackChans and
+ // unsetting parkingOnChan. The moment we unlock chanLock
+ // we risk gp getting readied by a channel operation and
+ // so gp could continue running before everything before
+ // the unlock is visible (even to gp itself).
unlock((*mutex)(chanLock))
return true
}
<-done
}
+func TestNoShrinkStackWhileParking(t *testing.T) {
+ // The goal of this test is to trigger a "racy sudog adjustment"
+ // throw. Basically, there's a window between when a goroutine
+ // becomes available for preemption for stack scanning (and thus,
+ // stack shrinking) but before the goroutine has fully parked on a
+ // channel. See issue 40641 for more details on the problem.
+ //
+ // The way we try to induce this failure is to set up two
+ // goroutines: a sender and a reciever that communicate across
+ // a channel. We try to set up a situation where the sender
+ // grows its stack temporarily then *fully* blocks on a channel
+ // often. Meanwhile a GC is triggered so that we try to get a
+ // mark worker to shrink the sender's stack and race with the
+ // sender parking.
+ //
+ // Unfortunately the race window here is so small that we
+ // either need a ridiculous number of iterations, or we add
+ // "usleep(1000)" to park_m, just before the unlockf call.
+ const n = 10
+ send := func(c chan<- int, done chan struct{}) {
+ for i := 0; i < n; i++ {
+ c <- i
+ // Use lots of stack briefly so that
+ // the GC is going to want to shrink us
+ // when it scans us. Make sure not to
+ // do any function calls otherwise
+ // in order to avoid us shrinking ourselves
+ // when we're preempted.
+ stackGrowthRecursive(20)
+ }
+ done <- struct{}{}
+ }
+ recv := func(c <-chan int, done chan struct{}) {
+ for i := 0; i < n; i++ {
+ // Sleep here so that the sender always
+ // fully blocks.
+ time.Sleep(10 * time.Microsecond)
+ <-c
+ }
+ done <- struct{}{}
+ }
+ for i := 0; i < n*20; i++ {
+ c := make(chan int)
+ done := make(chan struct{})
+ go recv(c, done)
+ go send(c, done)
+ // Wait a little bit before triggering
+ // the GC to make sure the sender and
+ // reciever have gotten into their groove.
+ time.Sleep(50 * time.Microsecond)
+ runtime.GC()
+ <-done
+ <-done
+ }
+}
+
func TestSelectDuplicateChannel(t *testing.T) {
// This test makes sure we can queue a G on
// the same channel multiple times.
<-done
}
+var padData [128]uint64
+
func stackGrowthRecursive(i int) {
var pad [128]uint64
- if i != 0 && pad[0] == 0 {
+ pad = padData
+ for j := range pad {
+ if pad[j] != 0 {
+ return
+ }
+ }
+ if i != 0 {
stackGrowthRecursive(i - 1)
}
}
// copying needs to acquire channel locks to protect these
// areas of the stack.
activeStackChans bool
+ // parkingOnChan indicates that the goroutine is about to
+ // park on a chansend or chanrecv. Used to signal an unsafe point
+ // for stack shrinking. It's a boolean value, but is updated atomically.
+ parkingOnChan uint8
raceignore int8 // ignore race detection events
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
// This file contains the implementation of Go select statements.
import (
+ "runtime/internal/atomic"
"unsafe"
)
func selparkcommit(gp *g, _ unsafe.Pointer) bool {
// There are unlocked sudogs that point into gp's stack. Stack
// copying must lock the channels of those sudogs.
+ // Set activeStackChans here instead of before we try parking
+ // because we could self-deadlock in stack growth on a
+ // channel lock.
gp.activeStackChans = true
+ // Mark that it's safe for stack shrinking to occur now,
+ // because any thread acquiring this G's stack for shrinking
+ // is guaranteed to observe activeStackChans after this store.
+ atomic.Store8(&gp.parkingOnChan, 0)
+ // Make sure we unlock after setting activeStackChans and
+ // unsetting parkingOnChan. The moment we unlock any of the
+ // channel locks we risk gp getting readied by a channel operation
+ // and so gp could continue running before everything before the
+ // unlock is visible (even to gp itself).
+
// This must not access gp's stack (see gopark). In
// particular, it must not access the *hselect. That's okay,
// because by the time this is called, gp.waiting has all
// wait for someone to wake us up
gp.param = nil
+ // Signal to anyone trying to shrink our stack that we're about
+ // to park on a channel. The window between when this G's status
+ // changes and when we set gp.activeStackChans is not safe for
+ // stack shrinking.
+ atomic.Store8(&gp.parkingOnChan, 1)
gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1)
gp.activeStackChans = false
// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
if !gp.activeStackChans {
+ if newsize < old.hi-old.lo && atomic.Load8(&gp.parkingOnChan) != 0 {
+ // It's not safe for someone to shrink this stack while we're actively
+ // parking on a channel, but it is safe to grow since we do that
+ // ourselves and explicitly don't want to synchronize with channels
+ // since we could self-deadlock.
+ throw("racy sudog adjustment due to parking on channel")
+ }
adjustsudogs(gp, &adjinfo)
} else {
// sudogs may be pointing in to the stack and gp has
// We also can't copy the stack if we're at an asynchronous
// safe-point because we don't have precise pointer maps for
// all frames.
- return gp.syscallsp == 0 && !gp.asyncSafePoint
+ //
+ // We also can't *shrink* the stack in the window between the
+ // goroutine calling gopark to park on a channel and
+ // gp.activeStackChans being set.
+ return gp.syscallsp == 0 && !gp.asyncSafePoint && atomic.Load8(&gp.parkingOnChan) == 0
}
// Maybe shrink the stack being used by gp.