From: Dmitriy Vyukov Date: Thu, 11 Jul 2013 19:57:36 +0000 (-0400) Subject: runtime: fix CPU underutilization X-Git-Tag: go1.2rc2~1085 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=32fef9908a6dcd523ae44766717662764e6c14ee;p=gostls13.git runtime: fix CPU underutilization runtime.newproc/ready are deliberately sloppy about waking new M's, they only ensure that there is at least 1 spinning M. Currently to compensate for that, schedule() checks if the current P has local work and there are no spinning M's, it wakes up another one. It does not work if goroutines do not call schedule. With this change a spinning M wakes up another M when it finds work to do. It's also not ideal, but it fixes the underutilization. A proper check would require to know the exact number of runnable G's, but it's too expensive to maintain. Fixes #5586. This is reincarnation of cl/9776044 with the bug fixed. The bug was due to code added after cl/9776044 was created: if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) { runtime·lock(&runtime·sched); gp = globrunqget(m->p, 1); runtime·unlock(&runtime·sched); } If M gets gp from global runq here, it does not reset m->spinning. R=golang-dev, rsc CC=golang-dev https://golang.org/cl/10743044 --- diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index cddbefc0f4..44741a66e8 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -1133,6 +1133,25 @@ stop: goto top; } +static void +resetspinning(void) +{ + int32 nmspinning; + + if(m->spinning) { + m->spinning = false; + nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1); + if(nmspinning < 0) + runtime·throw("findrunnable: negative nmspinning"); + } else + nmspinning = runtime·atomicload(&runtime·sched.nmspinning); + + // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), + // so see if we need to wakeup another P here. + if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0) + wakep(); +} + // Injects the list of runnable G's into the scheduler. // Can run concurrently with GC. static void @@ -1184,28 +1203,22 @@ top: runtime·lock(&runtime·sched); gp = globrunqget(m->p, 1); runtime·unlock(&runtime·sched); + if(gp) + resetspinning(); } if(gp == nil) { gp = runqget(m->p); if(gp && m->spinning) runtime·throw("schedule: spinning with local work"); } - if(gp == nil) - gp = findrunnable(); - - if(m->spinning) { - m->spinning = false; - runtime·xadd(&runtime·sched.nmspinning, -1); + if(gp == nil) { + gp = findrunnable(); // blocks until work is available + resetspinning(); } - // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), - // so see if we need to wakeup another M here. - if (m->p->runqhead != m->p->runqtail && - runtime·atomicload(&runtime·sched.nmspinning) == 0 && - runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic - wakep(); - if(gp->lockedm) { + // Hands off own p to the locked m, + // then blocks waiting for a new p. startlockedm(gp); goto top; } diff --git a/src/pkg/runtime/proc_test.go b/src/pkg/runtime/proc_test.go index 7e551d2fd1..8f3b407375 100644 --- a/src/pkg/runtime/proc_test.go +++ b/src/pkg/runtime/proc_test.go @@ -93,6 +93,30 @@ func TestYieldLocked(t *testing.T) { <-c } +func TestGoroutineParallelism(t *testing.T) { + const P = 4 + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P)) + for try := 0; try < 10; try++ { + done := make(chan bool) + x := uint32(0) + for p := 0; p < P; p++ { + // Test that all P goroutines are scheduled at the same time + go func(p int) { + for i := 0; i < 3; i++ { + expected := uint32(P*i + p) + for atomic.LoadUint32(&x) != expected { + } + atomic.StoreUint32(&x, expected+1) + } + done <- true + }(p) + } + for p := 0; p < P; p++ { + <-done + } + } +} + func TestBlockLocked(t *testing.T) { const N = 10 c := make(chan bool)