runtime: fix CPU underutilization

author Dmitry Vyukov <dvyukov@google.com>

Fri, 18 Mar 2016 15:34:11 +0000 (16:34 +0100)

committer Dmitry Vyukov <dvyukov@google.com>

Tue, 3 May 2016 10:06:32 +0000 (10:06 +0000)
author Dmitry Vyukov <dvyukov@google.com>
Fri, 18 Mar 2016 15:34:11 +0000 (16:34 +0100)
committer Dmitry Vyukov <dvyukov@google.com>
Tue, 3 May 2016 10:06:32 +0000 (10:06 +0000)
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go

index fd33c9c3c8762f3940bd3204770e1b8f96799917..199a049431a93b5dd957f656f1411c30e6f65527 100644 (file)
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -117,6 +117,38 @@ func RunSchedLocalQueueStealTest() {
         }
  }
  
+func RunSchedLocalQueueEmptyTest(iters int) {
+       // Test that runq is not spuriously reported as empty.
+       // Runq emptiness affects scheduling decisions and spurious emptiness
+       // can lead to underutilization (both runnable Gs and idle Ps coexist
+       // for arbitrary long time).
+       done := make(chan bool, 1)
+       p := new(p)
+       gs := make([]g, 2)
+       ready := new(uint32)
+       for i := 0; i < iters; i++ {
+               *ready = 0
+               next0 := (i & 1) == 0
+               next1 := (i & 2) == 0
+               runqput(p, &gs[0], next0)
+               go func() {
+                       for atomic.Xadd(ready, 1); atomic.Load(ready) != 2; {
+                       }
+                       if runqempty(p) {
+                               println("next:", next0, next1)
+                               throw("queue is empty")
+                       }
+                       done <- true
+               }()
+               for atomic.Xadd(ready, 1); atomic.Load(ready) != 2; {
+               }
+               runqput(p, &gs[1], next1)
+               runqget(p)
+               <-done
+               runqget(p)
+       }
+}
+
  var StringHash = stringHash
  var BytesHash = bytesHash
  var Int32Hash = int32Hash
diff --git a/src/runtime/proc.go b/src/runtime/proc.go

index ee732e3cf7db092f3debe088c41bf623ae2142bc..e03059080db0158f4ab093796e37d689fee8012b 100644 (file)
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -3921,9 +3921,20 @@ func pidleget() *p {
  }
  
  // runqempty returns true if _p_ has no Gs on its local run queue.
-// Note that this test is generally racy.
+// It never returns true spuriously.
  func runqempty(_p_ *p) bool {
-       return _p_.runqhead == _p_.runqtail && _p_.runnext == 0
+       // Defend against a race where 1) _p_ has G1 in runqnext but runqhead == runqtail,
+       // 2) runqput on _p_ kicks G1 to the runq, 3) runqget on _p_ empties runqnext.
+       // Simply observing that runqhead == runqtail and then observing that runqnext == nil
+       // does not mean the queue is empty.
+       for {
+               head := atomic.Load(&_p_.runqhead)
+               tail := atomic.Load(&_p_.runqtail)
+               runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&_p_.runnext)))
+               if tail == atomic.Load(&_p_.runqtail) {
+                       return head == tail && runnext == 0
+               }
+       }
  }
  
  // To shake out latent assumptions about scheduling order,
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go

index b1d7f75870e602856b1d4ab48f3acfea97081d2e..899412107199545a5379c76724291380026d69f0 100644 (file)
--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -553,6 +553,24 @@ func TestSchedLocalQueueSteal(t *testing.T) {
         runtime.RunSchedLocalQueueStealTest()
  }
  
+func TestSchedLocalQueueEmpty(t *testing.T) {
+       if runtime.NumCPU() == 1 {
+               // Takes too long and does not trigger the race.
+               t.Skip("skipping on uniprocessor")
+       }
+       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4))
+
+       // If runtime triggers a forced GC during this test then it will deadlock,
+       // since the goroutines can't be stopped/preempted during spin wait.
+       defer debug.SetGCPercent(debug.SetGCPercent(-1))
+
+       iters := int(1e5)
+       if testing.Short() {
+               iters = 1e2
+       }
+       runtime.RunSchedLocalQueueEmptyTest(iters)
+}
+
  func benchmarkStackGrowth(b *testing.B, rec int) {
         b.RunParallel(func(pb *testing.PB) {
                 for pb.Next() {
author	Dmitry Vyukov <dvyukov@google.com>
	Fri, 18 Mar 2016 15:34:11 +0000 (16:34 +0100)
committer	Dmitry Vyukov <dvyukov@google.com>
	Tue, 3 May 2016 10:06:32 +0000 (10:06 +0000)
src/runtime/export_test.go		patch \| blob \| history
src/runtime/proc.go		patch \| blob \| history
src/runtime/proc_test.go		patch \| blob \| history