From ce9a4afa6f931c1ab832b4c92d3d6768fbd2ab7a Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 22 Dec 2014 18:14:00 +0300 Subject: [PATCH] runtime: simplify procresize Currently we do very a complex rebalancing of runnable goroutines between queues, which tries to preserve scheduling fairness. Besides being complex and error-prone, it also destroys all locality of scheduling. This change uses simpler scheme: leave runnable goroutines where they are, during starttheworld start all Ps with local work, plus start one additional P in case we have excessive runnable goroutines in local queues or in the global queue. The schedler must be able to operate efficiently w/o the rebalancing, because garbage collections do not have to happen frequently. The immediate need is execution tracing support: handling of garabage collection which does stoptheworld/starttheworld several times becomes exceedingly complex if the current execution can jump between Ps during starttheworld. Change-Id: I4fdb7a6d80ca4bd08900d0c6a0a252a95b1a2c90 Reviewed-on: https://go-review.googlesource.com/1951 Reviewed-by: Rick Hudson --- src/runtime/proc1.go | 112 +++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 64 deletions(-) diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go index 658a6d568e..220f463f4b 100644 --- a/src/runtime/proc1.go +++ b/src/runtime/proc1.go @@ -132,7 +132,9 @@ func schedinit() { } procs = n } - procresize(int32(procs)) + if procresize(int32(procs)) != nil { + gothrow("unknown runnable goroutine during bootstrap") + } if buildVersion == "" { // Condition should never trigger. This code just serves @@ -651,30 +653,14 @@ func starttheworld() { injectglist(gp) add := needaddgcproc() lock(&sched.lock) + + procs := gomaxprocs if newprocs != 0 { - procresize(newprocs) + procs = newprocs newprocs = 0 - } else { - procresize(gomaxprocs) } + p1 := procresize(procs) sched.gcwaiting = 0 - - var p1 *p - for { - p := pidleget() - if p == nil { - break - } - // procresize() puts p's with work at the beginning of the list. - // Once we reach a p without a run queue, the rest don't have one either. - if p.runqhead == p.runqtail { - pidleput(p) - break - } - p.m = mget() - p.link = p1 - p1 = p - } if sched.sysmonwait != 0 { sched.sysmonwait = 0 notewakeup(&sched.sysmonnote) @@ -699,6 +685,13 @@ func starttheworld() { } } + // Wakeup an additional proc in case we have excessive runnable goroutines + // in local queues or in the global queue. If we don't, the proc will park itself. + // If we have lots of excessive work, resetspinning will unpark additional procs as necessary. + if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 { + wakep() + } + if add { // If GC could have used another helper proc, start one now, // in the hope that it will be available next time. @@ -2383,7 +2376,8 @@ func setcpuprofilerate_m(hz int32) { // Change number of processors. The world is stopped, sched is locked. // gcworkbufs are not being modified by either the GC or // the write barrier code. -func procresize(new int32) { +// Returns list of Ps with local work, they need to be scheduled by the caller. +func procresize(new int32) *p { old := gomaxprocs if old < 0 || old > _MaxGomaxprocs || new <= 0 || new > _MaxGomaxprocs { gothrow("procresize: invalid arg") @@ -2410,19 +2404,11 @@ func procresize(new int32) { } } - // redistribute runnable G's evenly - // collect all runnable goroutines in global queue preserving FIFO order - // FIFO order is required to ensure fairness even during frequent GCs - // see http://golang.org/issue/7126 - empty := false - for !empty { - empty = true - for i := int32(0); i < old; i++ { - p := allp[i] - if p.runqhead == p.runqtail { - continue - } - empty = false + // free unused P's + for i := new; i < old; i++ { + p := allp[i] + // move all runable goroutines to the global queue + for p.runqhead != p.runqtail { // pop from tail of local queue p.runqtail-- gp := p.runq[p.runqtail%uint32(len(p.runq))] @@ -2434,25 +2420,6 @@ func procresize(new int32) { } sched.runqsize++ } - } - - // fill local queues with at most len(p.runq)/2 goroutines - // start at 1 because current M already executes some G and will acquire allp[0] below, - // so if we have a spare G we want to put it into allp[1]. - var _p_ p - for i := int32(1); i < new*int32(len(_p_.runq))/2 && sched.runqsize > 0; i++ { - gp := sched.runqhead - sched.runqhead = gp.schedlink - if sched.runqhead == nil { - sched.runqtail = nil - } - sched.runqsize-- - runqput(allp[i%new], gp) - } - - // free unused P's - for i := new; i < old; i++ { - p := allp[i] freemcache(p.mcache) p.mcache = nil gfpurge(p) @@ -2461,22 +2428,39 @@ func procresize(new int32) { } _g_ := getg() - if _g_.m.p != nil { - _g_.m.p.m = nil + if _g_.m.p != nil && _g_.m.p.id < new { + // continue to use the current P + _g_.m.p.status = _Prunning + } else { + // release the current P and acquire allp[0] + if _g_.m.p != nil { + _g_.m.p.m = nil + } + _g_.m.p = nil + _g_.m.mcache = nil + p := allp[0] + p.m = nil + p.status = _Pidle + acquirep(p) } - _g_.m.p = nil - _g_.m.mcache = nil - p := allp[0] - p.m = nil - p.status = _Pidle - acquirep(p) - for i := new - 1; i > 0; i-- { + var runnablePs *p + for i := new - 1; i >= 0; i-- { p := allp[i] + if _g_.m.p == p { + continue + } p.status = _Pidle - pidleput(p) + if p.runqhead == p.runqtail { + pidleput(p) + } else { + p.m = mget() + p.link = runnablePs + runnablePs = p + } } var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 atomicstore((*uint32)(unsafe.Pointer(int32p)), uint32(new)) + return runnablePs } // Associate p and the current m. -- 2.48.1