]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: exit idle worker if there's higher-priority work
authorAustin Clements <austin@google.com>
Mon, 31 Oct 2016 00:20:17 +0000 (20:20 -0400)
committerAustin Clements <austin@google.com>
Sun, 20 Nov 2016 22:44:17 +0000 (22:44 +0000)
Idle GC workers trigger whenever there's a GC running and the
scheduler doesn't find any other work. However, they currently run for
a full scheduler quantum (~10ms) once started.

This is really bad for event-driven applications, where work may come
in on the network hundreds of times during that window. In the
go-gcbench rpc benchmark, this is bad enough to often cause effective
STWs where all Ps are in the idle worker. When this happens, we don't
even poll the network any more (except for the background 10ms poll in
sysmon), so we don't even know there's more work to do.

Fix this by making idle workers check with the scheduler roughly every
100 µs to see if there's any higher-priority work the P should be
doing. This check includes polling the network for incoming work.

Fixes #16528.

Change-Id: I6f62ebf6d36a92368da9891bafbbfd609b9bd003
Reviewed-on: https://go-review.googlesource.com/32433
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
src/runtime/mgc.go
src/runtime/mgcmark.go
src/runtime/proc.go

index 430b7aa657a82fe07eafb8e37616ea8e3a4e704c..58b52e8bed416511383247182c7e1f9ed7a7b6a1 100644 (file)
@@ -1497,8 +1497,10 @@ func gcBgMarkWorker(_p_ *p) {
                                throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
                        case gcMarkWorkerDedicatedMode:
                                gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
-                       case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode:
+                       case gcMarkWorkerFractionalMode:
                                gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
+                       case gcMarkWorkerIdleMode:
+                               gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
                        }
                        casgstatus(gp, _Gwaiting, _Grunning)
                })
index cfd24e06fb684282e9d6742eb93c0081f18a8784..85130bf22799e64c0e8d41402e7559a499dfddca 100644 (file)
@@ -33,6 +33,14 @@ const (
        // This must be > _MaxSmallSize so that the object base is the
        // span base.
        maxObletBytes = 128 << 10
+
+       // idleCheckThreshold specifies how many units of work to do
+       // between run queue checks in an idle worker. Assuming a scan
+       // rate of 1 MB/ms, this is ~100 µs. Lower values have higher
+       // overhead in the scan loop (the scheduler check may perform
+       // a syscall, so its overhead is nontrivial). Higher values
+       // make the system less responsive to incoming work.
+       idleCheckThreshold = 100000
 )
 
 // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
@@ -991,6 +999,7 @@ const (
        gcDrainUntilPreempt gcDrainFlags = 1 << iota
        gcDrainNoBlock
        gcDrainFlushBgCredit
+       gcDrainIdle
 
        // gcDrainBlock means neither gcDrainUntilPreempt or
        // gcDrainNoBlock. It is the default, but callers should use
@@ -1004,6 +1013,9 @@ const (
 // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
 // is set. This implies gcDrainNoBlock.
 //
+// If flags&gcDrainIdle != 0, gcDrain returns when there is other work
+// to do. This implies gcDrainNoBlock.
+//
 // If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
 // unable to get more work. Otherwise, it will block until all
 // blocking calls are blocked in gcDrain.
@@ -1020,8 +1032,14 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 
        gp := getg().m.curg
        preemptible := flags&gcDrainUntilPreempt != 0
-       blocking := flags&(gcDrainUntilPreempt|gcDrainNoBlock) == 0
+       blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0
        flushBgCredit := flags&gcDrainFlushBgCredit != 0
+       idle := flags&gcDrainIdle != 0
+
+       initScanWork := gcw.scanWork
+       // idleCheck is the scan work at which to perform the next
+       // idle check with the scheduler.
+       idleCheck := initScanWork + idleCheckThreshold
 
        // Drain root marking jobs.
        if work.markrootNext < work.markrootJobs {
@@ -1031,11 +1049,12 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
                                break
                        }
                        markroot(gcw, job)
+                       if idle && pollWork() {
+                               goto done
+                       }
                }
        }
 
-       initScanWork := gcw.scanWork
-
        // Drain heap marking jobs.
        for !(preemptible && gp.preempt) {
                // Try to keep work available on the global queue. We used to
@@ -1071,7 +1090,15 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
                                gcFlushBgCredit(gcw.scanWork - initScanWork)
                                initScanWork = 0
                        }
+                       idleCheck -= gcw.scanWork
                        gcw.scanWork = 0
+
+                       if idle && idleCheck <= 0 {
+                               idleCheck += idleCheckThreshold
+                               if pollWork() {
+                                       break
+                               }
+                       }
                }
        }
 
@@ -1079,6 +1106,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
        // point because we must preserve the condition that the work
        // buffers are empty.
 
+done:
        // Flush remaining scan work credit.
        if gcw.scanWork > 0 {
                atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
index 90e6d18ee2107a160f76686f1ea3a4bd04212cf8..1b21b37de8346aab2a8cf6a0bc70bd39aeace416 100644 (file)
@@ -2053,6 +2053,27 @@ stop:
        goto top
 }
 
+// pollWork returns true if there is non-background work this P could
+// be doing. This is a fairly lightweight check to be used for
+// background work loops, like idle GC. It checks a subset of the
+// conditions checked by the actual scheduler.
+func pollWork() bool {
+       if sched.runqsize != 0 {
+               return true
+       }
+       p := getg().m.p.ptr()
+       if !runqempty(p) {
+               return true
+       }
+       if netpollinited() && sched.lastpoll != 0 {
+               if gp := netpoll(false); gp != nil {
+                       injectglist(gp)
+                       return true
+               }
+       }
+       return false
+}
+
 func resetspinning() {
        _g_ := getg()
        if !_g_.m.spinning {