mysg.elem = ep
mysg.waitlink = nil
mysg.g = gp
- mysg.selectdone = nil
+ mysg.isSelect = false
mysg.c = c
gp.waiting = mysg
gp.param = nil
mysg.waitlink = nil
gp.waiting = mysg
mysg.g = gp
- mysg.selectdone = nil
+ mysg.isSelect = false
mysg.c = c
gp.param = nil
c.recvq.enqueue(mysg)
sgp.next = nil // mark as removed (see dequeueSudog)
}
- // if sgp participates in a select and is already signaled, ignore it
- if sgp.selectdone != nil {
- // claim the right to signal
- if *sgp.selectdone != 0 || !atomic.Cas(sgp.selectdone, 0, 1) {
+ // if a goroutine was put on this queue because of a
+ // select, there is a small window between the goroutine
+ // being woken up by a different case and it grabbing the
+ // channel locks. Once it has the lock
+ // it removes itself from the queue, so we won't see it after that.
+ // We use a flag in the G struct to tell us when someone
+ // else has won the race to signal this goroutine but the goroutine
+ // hasn't removed itself from the queue yet.
+ if sgp.isSelect {
+ if !atomic.Cas(&sgp.g.selectDone, 0, 1) {
continue
}
}
// channel this sudog is blocking on. shrinkstack depends on
// this for sudogs involved in channel ops.
- g *g
- selectdone *uint32 // CAS to 1 to win select race (may point to stack)
- next *sudog
- prev *sudog
- elem unsafe.Pointer // data element (may point to stack)
+ g *g
+
+ // isSelect indicates g is participating in a select, so
+ // g.selectDone must be CAS'd to win the wake-up race.
+ isSelect bool
+ next *sudog
+ prev *sudog
+ elem unsafe.Pointer // data element (may point to stack)
// The following fields are never accessed concurrently.
// For channels, waitlink is only accessed by g.
cgoCtxt []uintptr // cgo traceback context
labels unsafe.Pointer // profiler labels
timer *timer // cached timer for time.Sleep
+ selectDone uint32 // are we participating in a select and did someone win the race?
// Per-G GC state
var (
gp *g
- done uint32
sg *sudog
c *hchan
k *scase
// pass 2 - enqueue on all chans
gp = getg()
- done = 0
if gp.waiting != nil {
throw("gp.waiting != nil")
}
c = cas.c
sg := acquireSudog()
sg.g = gp
- // Note: selectdone is adjusted for stack copies in stack1.go:adjustsudogs
- sg.selectdone = (*uint32)(noescape(unsafe.Pointer(&done)))
+ sg.isSelect = true
// No stack splits between assigning elem and enqueuing
// sg on gp.waiting where copystack can find it.
sg.elem = cas.elem
gp.param = nil
gopark(selparkcommit, nil, "select", traceEvGoBlockSelect, 1)
- // While we were asleep, some goroutine came along and completed
- // one of the cases in the select and woke us up (called ready).
- // As part of that process, the goroutine did a cas on done above
- // (aka *sg.selectdone for all queued sg) to win the right to
- // complete the select. Now done = 1.
- //
- // If we copy (grow) our own stack, we will update the
- // selectdone pointers inside the gp.waiting sudog list to point
- // at the new stack. Another goroutine attempting to
- // complete one of our (still linked in) select cases might
- // see the new selectdone pointer (pointing at the new stack)
- // before the new stack has real data; if the new stack has done = 0
- // (before the old values are copied over), the goroutine might
- // do a cas via sg.selectdone and incorrectly believe that it has
- // won the right to complete the select, executing a second
- // communication and attempting to wake us (call ready) again.
- //
- // Then things break.
- //
- // The best break is that the goroutine doing ready sees the
- // _Gcopystack status and throws, as in #17007.
- // A worse break would be for us to continue on, start running real code,
- // block in a semaphore acquisition (sema.go), and have the other
- // goroutine wake us up without having really acquired the semaphore.
- // That would result in the goroutine spuriously running and then
- // queue up another spurious wakeup when the semaphore really is ready.
- // In general the situation can cascade until something notices the
- // problem and causes a crash.
- //
- // A stack shrink does not have this problem, because it locks
- // all the channels that are involved first, blocking out the
- // possibility of a cas on selectdone.
- //
- // A stack growth before gopark above does not have this
- // problem, because we hold those channel locks (released by
- // selparkcommit).
- //
- // A stack growth after sellock below does not have this
- // problem, because again we hold those channel locks.
- //
- // The only problem is a stack growth during sellock.
- // To keep that from happening, run sellock on the system stack.
- //
- // It might be that we could avoid this if copystack copied the
- // stack before calling adjustsudogs. In that case,
- // syncadjustsudogs would need to recopy the tiny part that
- // it copies today, resulting in a little bit of extra copying.
- //
- // An even better fix, not for the week before a release candidate,
- // would be to put space in every sudog and make selectdone
- // point at (say) the space in the first sudog.
-
- systemstack(func() {
- sellock(scases, lockorder)
- })
+ sellock(scases, lockorder)
+ gp.selectDone = 0
sg = (*sudog)(gp.param)
gp.param = nil
sglist = gp.waiting
// Clear all elem before unlinking from gp.waiting.
for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink {
- sg1.selectdone = nil
+ sg1.isSelect = false
sg1.elem = nil
sg1.c = nil
}