]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: fix CPU underutilization
authorDmitriy Vyukov <dvyukov@google.com>
Thu, 27 Jun 2013 16:52:12 +0000 (20:52 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Thu, 27 Jun 2013 16:52:12 +0000 (20:52 +0400)
runtime.newproc/ready are deliberately sloppy about waking new M's,
they only ensure that there is at least 1 spinning M.
Currently to compensate for that, schedule() checks if the current P
has local work and there are no spinning M's, it wakes up another one.
It does not work if goroutines do not call schedule.
With this change a spinning M wakes up another M when it finds work to do.
It's also not ideal, but it fixes the underutilization.
A proper check would require to know the exact number of runnable G's,
but it's too expensive to maintain.
Fixes #5586.

R=rsc
CC=gobot, golang-dev
https://golang.org/cl/9776044

src/pkg/runtime/proc.c
src/pkg/runtime/proc_test.go

index e6844032a605af2421f143f26baab8f45462c626..6dcf564cb07343ce51d936301b49aa0aa3aa8aaf 100644 (file)
@@ -1018,7 +1018,7 @@ execute(G *gp)
 // Finds a runnable goroutine to execute.
 // Tries to steal from other P's, get g from global queue, poll network.
 static G*
-findrunnable(void)
+findrunnable1(void)
 {
        G *gp;
        P *p;
@@ -1127,6 +1127,29 @@ stop:
        goto top;
 }
 
+static G*
+findrunnable(void)
+{
+       G *gp;
+       int32 nmspinning;
+
+       gp = findrunnable1();  // blocks until work is available
+       if(m->spinning) {
+               m->spinning = false;
+               nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
+               if(nmspinning < 0)
+                       runtime·throw("findrunnable: negative nmspinning");
+       } else
+               nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
+
+       // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
+       // so see if we need to wakeup another P here.
+       if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
+               wakep();
+
+       return gp;
+}
+
 // Injects the list of runnable G's into the scheduler.
 // Can run concurrently with GC.
 static void
@@ -1185,21 +1208,11 @@ top:
                        runtime·throw("schedule: spinning with local work");
        }
        if(gp == nil)
-               gp = findrunnable();
-
-       if(m->spinning) {
-               m->spinning = false;
-               runtime·xadd(&runtime·sched.nmspinning, -1);
-       }
-
-       // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
-       // so see if we need to wakeup another M here.
-       if (m->p->runqhead != m->p->runqtail &&
-               runtime·atomicload(&runtime·sched.nmspinning) == 0 &&
-               runtime·atomicload(&runtime·sched.npidle) > 0)  // TODO: fast atomic
-               wakep();
+               gp = findrunnable();  // blocks until work is available
 
        if(gp->lockedm) {
+               // Hands off own p to the locked m,
+               // then blocks waiting for a new p.
                startlockedm(gp);
                goto top;
        }
index 83368e0c33e6d2378dc1e62a21de59a9ce79718f..c72d54edbe5d4b5c776fae947a7fc86fcbbc7c40 100644 (file)
@@ -93,6 +93,30 @@ func TestYieldLocked(t *testing.T) {
        <-c
 }
 
+func TestGoroutineParallelism(t *testing.T) {
+       const P = 4
+       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P))
+       for try := 0; try < 10; try++ {
+               done := make(chan bool)
+               x := uint32(0)
+               for p := 0; p < P; p++ {
+                       // Test that all P goroutines are scheduled at the same time
+                       go func(p int) {
+                               for i := 0; i < 3; i++ {
+                                       expected := uint32(P*i + p)
+                                       for atomic.LoadUint32(&x) != expected {
+                                       }
+                                       atomic.StoreUint32(&x, expected+1)
+                               }
+                               done <- true
+                       }(p)
+               }
+               for p := 0; p < P; p++ {
+                       <-done
+               }
+       }
+}
+
 func TestBlockLocked(t *testing.T) {
        const N = 10
        c := make(chan bool)