]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: scan mark worker stacks like normal
authorAustin Clements <austin@google.com>
Mon, 24 Oct 2016 18:20:07 +0000 (14:20 -0400)
committerAustin Clements <austin@google.com>
Wed, 26 Oct 2016 18:13:16 +0000 (18:13 +0000)
Currently, markroot delays scanning mark worker stacks until mark
termination by putting the mark worker G directly on the rescan list
when it encounters one during the mark phase. Without this, since mark
workers are non-preemptible, two mark workers that attempt to scan
each other's stacks can deadlock.

However, this is annoyingly asymmetric and causes some real problems.
First, markroot does not own the G at that point, so it's not
technically safe to add it to the rescan list. I haven't been able to
find a specific problem this could cause, but I suspect it's the root
cause of issue #17099. Second, this will interfere with the hybrid
barrier, since there is no stack rescanning during mark termination
with the hybrid barrier.

This commit switches to a different approach. We move the mark
worker's call to gcDrain to the system stack and set the mark worker's
status to _Gwaiting for the duration of the drain to indicate that
it's preemptible. This lets another mark worker scan its G stack while
the drain is running on the system stack. We don't return to the G
stack until we can switch back to _Grunning, which ensures we don't
race with a stack scan. This lets us eliminate the special case for
mark worker stack scans and scan them just like any other goroutine.
The only subtlety to this approach is that we have to disable stack
shrinking for mark workers; they could be referring to captured
variables from the G stack, so it's not safe to move their stacks.

Updates #17099 and #17503.

Change-Id: Ia5213949ec470af63e24dfce01df357c12adbbea
Reviewed-on: https://go-review.googlesource.com/31820
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
src/runtime/mgc.go
src/runtime/mgcmark.go
src/runtime/stack.go

index 94a6e006852298aac55f7faa85572b530ea22d7c..8c50e9fb79a428fc7d26979e98a455c527d004b5 100644 (file)
@@ -1461,14 +1461,25 @@ func gcBgMarkWorker(_p_ *p) {
                        throw("work.nwait was > work.nproc")
                }
 
-               switch _p_.gcMarkWorkerMode {
-               default:
-                       throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
-               case gcMarkWorkerDedicatedMode:
-                       gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
-               case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode:
-                       gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
-               }
+               systemstack(func() {
+                       // Mark our goroutine preemptible so its stack
+                       // can be scanned. This lets two mark workers
+                       // scan each other (otherwise, they would
+                       // deadlock). We must not modify anything on
+                       // the G stack. However, stack shrinking is
+                       // disabled for mark workers, so it is safe to
+                       // read from the G stack.
+                       casgstatus(gp, _Grunning, _Gwaiting)
+                       switch _p_.gcMarkWorkerMode {
+                       default:
+                               throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
+                       case gcMarkWorkerDedicatedMode:
+                               gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
+                       case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode:
+                               gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
+                       }
+                       casgstatus(gp, _Gwaiting, _Grunning)
+               })
 
                // If we are nearing the end of mark, dispose
                // of the cache promptly. We must do this
index e62e470217055b2ef37e0593ed4e8bade023ac8b..9489a0a3445fc33169b9ee5cf66db1d645071b6a 100644 (file)
@@ -221,24 +221,13 @@ func markroot(gcw *gcWork, i uint32) {
                        gp.waitsince = work.tstart
                }
 
-               if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC && readgstatus(gp) != _Gdead {
-                       // GC background workers may be
-                       // non-preemptible, so we may deadlock if we
-                       // try to scan them during a concurrent phase.
-                       // They also have tiny stacks, so just ignore
-                       // them until mark termination.
-                       gp.gcscandone = true
-                       queueRescan(gp)
-                       break
-               }
-
                // scang must be done on the system stack in case
                // we're trying to scan our own stack.
                systemstack(func() {
                        // If this is a self-scan, put the user G in
                        // _Gwaiting to prevent self-deadlock. It may
-                       // already be in _Gwaiting if this is mark
-                       // termination.
+                       // already be in _Gwaiting if this is mark
+                       // worker or we're in mark termination.
                        userG := getg().m.curg
                        selfScan := gp == userG && readgstatus(userG) == _Grunning
                        if selfScan {
index e803dc17a0df81a8fb296b07d80cb9922168a759..dfc71b41c3d85f736d4a787933df2bb9beae8b8a 100644 (file)
@@ -1122,6 +1122,11 @@ func shrinkstack(gp *g) {
        if debug.gcshrinkstackoff > 0 {
                return
        }
+       if gp.startpc == gcBgMarkWorkerPC {
+               // We're not allowed to shrink the gcBgMarkWorker
+               // stack (see gcBgMarkWorker for explanation).
+               return
+       }
 
        oldsize := gp.stackAlloc
        newsize := oldsize / 2