From 62ba520b237afebd16e5c55113523f1511643fb1 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Mon, 26 Oct 2015 16:29:25 -0400
Subject: [PATCH] runtime: eliminate getfull barrier from concurrent mark
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Currently dedicated mark workers participate in the getfull barrier
during concurrent mark. However, the getfull barrier wasn't designed
for concurrent work and this causes no end of headaches.

In the concurrent setting, participants come and go. This makes mark
completion susceptible to live-lock: since dedicated workers are only
periodically polling for completion, it's possible for the program to
be in some transient worker each time one of the dedicated workers
wakes up to check if it can exit the getfull barrier. It also
complicates reasoning about the system because dedicated workers
participate directly in the getfull barrier, but transient workers
must instead use trygetfull because they have exit conditions that
aren't captured by getfull (e.g., fractional workers exit when
preempted). The complexity of implementing these exit conditions
contributed to #11677. Furthermore, the getfull barrier is inefficient
because we could be running user code instead of spinning on a P. In
effect, we're dedicating 25% of the CPU to marking even if that means
we have to spin to make that 25%. It also causes issues on Windows
because we can't actually sleep for 100Âµs (#8687).

Fix this by making dedicated workers no longer participate in the
getfull barrier. Instead, dedicated workers simply return to the
scheduler when they fail to get more work, regardless of what others
workers are doing, and the scheduler only starts new dedicated workers
if there's work available. Everything that needs to be handled by this
barrier is already handled by detection of mark completion.

This makes the system much more symmetric because all workers and
assists now use trygetfull during concurrent mark. It also loosens the
25% CPU target so that we can give some of that 25% back to user code
if there isn't enough work to keep the mark worker busy. And it
eliminates the problematic 100Âµs sleep on Windows during concurrent
mark (though not during mark termination).

The downside of this is that if we hit a bottleneck in the heap graph
that then expands back out, the system may shut down dedicated workers
and take a while to start them back up. We'll address this in the next
commit.

Updates #12041 and #8687.

No effect on the go1 benchmarks. This slows down the garbage benchmark
by 9%, but we'll more than make it up in the next commit.

name              old time/op  new time/op  delta
XBenchGarbage-12  5.80ms Â± 2%  6.32ms Â± 4%  +9.03%  (p=0.000 n=20+20)

Change-Id: I65100a9ba005a8b5cf97940798918672ea9dd09b
Reviewed-on: https://go-review.googlesource.com/16297
Reviewed-by: Rick Hudson <rlh@golang.org>
---
 src/runtime/mgc.go     | 110 +++++++++++++++++++----------------------
 src/runtime/mgcmark.go |  22 ++++++---
 2 files changed, 64 insertions(+), 68 deletions(-)

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 8f2f9278ec..1ab42a8105 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -250,8 +250,7 @@ type gcMarkWorkerMode int
 const (
 	// gcMarkWorkerDedicatedMode indicates that the P of a mark
 	// worker is dedicated to running that mark worker. The mark
-	// worker should run without preemption until concurrent mark
-	// is done.
+	// worker should run without preemption.
 	gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota
 
 	// gcMarkWorkerFractionalMode indicates that a P is currently
@@ -593,6 +592,36 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
 		return nil
 	}
 
+	if !gcMarkWorkAvailable(_p_) {
+		// No work to be done right now. This can happen at
+		// the end of the mark phase when there are still
+		// assists tapering off. Don't bother running a worker
+		// now because it'll just return immediately.
+		if work.nwait == work.nproc {
+			// There are also no workers, which
+			// means we've reached a completion point.
+			// There may not be any workers to
+			// signal it, so signal it here.
+			readied := false
+			if gcBlackenPromptly {
+				if work.bgMark1.done == 0 {
+					throw("completing mark 2, but bgMark1.done == 0")
+				}
+				readied = work.bgMark2.complete()
+			} else {
+				readied = work.bgMark1.complete()
+			}
+			if readied {
+				// complete just called ready,
+				// but we're inside the
+				// scheduler. Let it know that
+				// that's okay.
+				resetspinning()
+			}
+		}
+		return nil
+	}
+
 	decIfPositive := func(ptr *int64) bool {
 		if *ptr > 0 {
 			if xaddint64(ptr, -1) >= 0 {
@@ -612,36 +641,6 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
 		// else for a while, so kick everything out of its run
 		// queue.
 	} else {
-		if !gcMarkWorkAvailable(_p_) {
-			// No work to be done right now. This can
-			// happen at the end of the mark phase when
-			// there are still assists tapering off. Don't
-			// bother running background mark because
-			// it'll just return immediately.
-			if work.nwait == work.nproc {
-				// There are also no workers, which
-				// means we've reached a completion point.
-				// There may not be any workers to
-				// signal it, so signal it here.
-				readied := false
-				if gcBlackenPromptly {
-					if work.bgMark1.done == 0 {
-						throw("completing mark 2, but bgMark1.done == 0")
-					}
-					readied = work.bgMark2.complete()
-				} else {
-					readied = work.bgMark1.complete()
-				}
-				if readied {
-					// complete just called ready,
-					// but we're inside the
-					// scheduler. Let it know that
-					// that's okay.
-					resetspinning()
-				}
-			}
-			return nil
-		}
 		if !decIfPositive(&c.fractionalMarkWorkersNeeded) {
 			// No more workers are need right now.
 			return nil
@@ -1368,46 +1367,37 @@ func gcBgMarkWorker(p *p) {
 			throw("work.nwait was > work.nproc")
 		}
 
-		done := false
 		switch p.gcMarkWorkerMode {
 		default:
 			throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
 		case gcMarkWorkerDedicatedMode:
-			gcDrain(&p.gcw, gcDrainBlock|gcDrainFlushBgCredit)
-			// gcDrain did the xadd(&work.nwait +1) to
-			// match the decrement above. It only returns
-			// at a mark completion point.
-			done = true
-			if !p.gcw.empty() {
-				throw("gcDrain returned with buffer")
-			}
+			gcDrain(&p.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
 		case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode:
 			gcDrain(&p.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
+		}
 
-			// If we are nearing the end of mark, dispose
-			// of the cache promptly. We must do this
-			// before signaling that we're no longer
-			// working so that other workers can't observe
-			// no workers and no work while we have this
-			// cached, and before we compute done.
-			if gcBlackenPromptly {
-				p.gcw.dispose()
-			}
+		// If we are nearing the end of mark, dispose
+		// of the cache promptly. We must do this
+		// before signaling that we're no longer
+		// working so that other workers can't observe
+		// no workers and no work while we have this
+		// cached, and before we compute done.
+		if gcBlackenPromptly {
+			p.gcw.dispose()
+		}
 
-			// Was this the last worker and did we run out
-			// of work?
-			incnwait := xadd(&work.nwait, +1)
-			if incnwait > work.nproc {
-				println("runtime: p.gcMarkWorkerMode=", p.gcMarkWorkerMode,
-					"work.nwait=", incnwait, "work.nproc=", work.nproc)
-				throw("work.nwait > work.nproc")
-			}
-			done = incnwait == work.nproc && !gcMarkWorkAvailable(nil)
+		// Was this the last worker and did we run out
+		// of work?
+		incnwait := xadd(&work.nwait, +1)
+		if incnwait > work.nproc {
+			println("runtime: p.gcMarkWorkerMode=", p.gcMarkWorkerMode,
+				"work.nwait=", incnwait, "work.nproc=", work.nproc)
+			throw("work.nwait > work.nproc")
 		}
 
 		// If this worker reached a background mark completion
 		// point, signal the main GC goroutine.
-		if done {
+		if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
 			if gcBlackenPromptly {
 				if work.bgMark1.done == 0 {
 					throw("completing mark 2, but bgMark1.done == 0")
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index ed8633c30f..0f1359669e 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -761,24 +761,29 @@ type gcDrainFlags int
 
 const (
 	gcDrainUntilPreempt gcDrainFlags = 1 << iota
+	gcDrainNoBlock
 	gcDrainFlushBgCredit
 
-	// gcDrainBlock is the opposite of gcDrainUntilPreempt. This
-	// is the default, but callers should use the constant for
-	// documentation purposes.
+	// gcDrainBlock means neither gcDrainUntilPreempt or
+	// gcDrainNoBlock. It is the default, but callers should use
+	// the constant for documentation purposes.
 	gcDrainBlock gcDrainFlags = 0
 )
 
 // gcDrain scans roots and objects in work buffers, blackening grey
 // objects until all roots and work buffers have been drained.
 //
-// If flags&gcDrainUntilPreempt != 0, gcDrain also returns if
-// g.preempt is set. Otherwise, this will block until all dedicated
-// workers are blocked in gcDrain.
+// If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
+// is set. This implies gcDrainNoBlock.
+//
+// If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
+// unable to get more work. Otherwise, it will block until all
+// blocking calls are blocked in gcDrain.
 //
 // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
 // credit to gcController.bgScanCredit every gcCreditSlack units of
 // scan work.
+//
 //go:nowritebarrier
 func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 	if !writeBarrierEnabled {
@@ -786,7 +791,8 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 	}
 
 	gp := getg()
-	blocking := flags&gcDrainUntilPreempt == 0
+	preemtible := flags&gcDrainUntilPreempt != 0
+	blocking := flags&(gcDrainUntilPreempt|gcDrainNoBlock) == 0
 	flushBgCredit := flags&gcDrainFlushBgCredit != 0
 
 	// Drain root marking jobs.
@@ -804,7 +810,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 	initScanWork := gcw.scanWork
 
 	// Drain heap marking jobs.
-	for blocking || !gp.preempt {
+	for !(preemtible && gp.preempt) {
 		// If another proc wants a pointer, give it some.
 		if work.nwait > 0 && work.full == 0 {
 			gcw.balance()
-- 
2.51.0