// an M.
top:
+ _p_ := _g_.m.p.ptr()
if sched.gcwaiting != 0 {
gcstopm()
goto top
}
- if _g_.m.p.ptr().runSafePointFn != 0 {
+ if _p_.runSafePointFn != 0 {
runSafePointFn()
}
if fingwait && fingwake {
}
// local runq
- if gp, inheritTime := runqget(_g_.m.p.ptr()); gp != nil {
+ if gp, inheritTime := runqget(_p_); gp != nil {
return gp, inheritTime
}
// global runq
if sched.runqsize != 0 {
lock(&sched.lock)
- gp := globrunqget(_g_.m.p.ptr(), 0)
+ gp := globrunqget(_p_, 0)
unlock(&sched.lock)
if gp != nil {
return gp, false
}
}
+ // Steal work from other P's.
+ procs := uint32(gomaxprocs)
+ if atomic.Load(&sched.npidle) == procs-1 {
+ // Either GOMAXPROCS=1 or everybody, except for us, is idle already.
+ // New work can appear from returning syscall/cgocall, network or timers.
+ // Neither of that submits to local run queues, so no point in stealing.
+ goto stop
+ }
// If number of spinning M's >= number of busy P's, block.
// This is necessary to prevent excessive CPU consumption
// when GOMAXPROCS>>1 but the program parallelism is low.
- if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= uint32(gomaxprocs)-atomic.Load(&sched.npidle) { // TODO: fast atomic
+ if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) { // TODO: fast atomic
goto stop
}
if !_g_.m.spinning {
_g_.m.spinning = true
atomic.Xadd(&sched.nmspinning, 1)
}
- // random steal from other P's
- for i := 0; i < int(4*gomaxprocs); i++ {
- if sched.gcwaiting != 0 {
- goto top
- }
- _p_ := allp[fastrand1()%uint32(gomaxprocs)]
- var gp *g
- if _p_ == _g_.m.p.ptr() {
- gp, _ = runqget(_p_)
- } else {
- stealRunNextG := i > 2*int(gomaxprocs) // first look for ready queues with more than 1 g
- gp = runqsteal(_g_.m.p.ptr(), _p_, stealRunNextG)
- }
- if gp != nil {
- return gp, false
+ for i := 0; i < 4; i++ {
+ for enum := stealOrder.start(fastrand1()); !enum.done(); enum.next() {
+ if sched.gcwaiting != 0 {
+ goto top
+ }
+ stealRunNextG := i > 2 // first look for ready queues with more than 1 g
+ if gp := runqsteal(_p_, allp[enum.position()], stealRunNextG); gp != nil {
+ return gp, false
+ }
}
}
// We have nothing to do. If we're in the GC mark phase, can
// safely scan and blacken objects, and have work to do, run
// idle-time marking rather than give up the P.
- if _p_ := _g_.m.p.ptr(); gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) {
+ if gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) {
_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
gp := _p_.gcBgMarkWorker.ptr()
casgstatus(gp, _Gwaiting, _Grunnable)
// return P and block
lock(&sched.lock)
- if sched.gcwaiting != 0 || _g_.m.p.ptr().runSafePointFn != 0 {
+ if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 {
unlock(&sched.lock)
goto top
}
if sched.runqsize != 0 {
- gp := globrunqget(_g_.m.p.ptr(), 0)
+ gp := globrunqget(_p_, 0)
unlock(&sched.lock)
return gp, false
}
- _p_ := releasep()
+ if releasep() != _p_ {
+ throw("findrunnable: wrong p")
+ }
pidleput(_p_)
unlock(&sched.lock)
runnablePs = p
}
}
+ stealOrder.reset(uint32(nprocs))
var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
return runnablePs
func sync_runtime_doSpin() {
procyield(active_spin_cnt)
}
+
+var stealOrder randomOrder
+
+// randomOrder/randomEnum are helper types for randomized work stealing.
+// They allow to enumerate all Ps in different pseudo-random orders without repetitions.
+// The algorithm is based on the fact that if we have X such that X and GOMAXPROCS
+// are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration.
+type randomOrder struct {
+ count uint32
+ coprimes []uint32
+}
+
+type randomEnum struct {
+ i uint32
+ count uint32
+ pos uint32
+ inc uint32
+}
+
+func (ord *randomOrder) reset(count uint32) {
+ ord.count = count
+ ord.coprimes = ord.coprimes[:0]
+ for i := uint32(1); i <= count; i++ {
+ if gcd(i, count) == 1 {
+ ord.coprimes = append(ord.coprimes, i)
+ }
+ }
+}
+
+func (ord *randomOrder) start(i uint32) randomEnum {
+ return randomEnum{
+ count: ord.count,
+ pos: i % ord.count,
+ inc: ord.coprimes[i%uint32(len(ord.coprimes))],
+ }
+}
+
+func (enum *randomEnum) done() bool {
+ return enum.i == enum.count
+}
+
+func (enum *randomEnum) next() {
+ enum.i++
+ enum.pos = (enum.pos + enum.inc) % enum.count
+}
+
+func (enum *randomEnum) position() uint32 {
+ return enum.pos
+}
+
+func gcd(a, b uint32) uint32 {
+ for b != 0 {
+ a, b = b, a%b
+ }
+ return a
+}
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Proc unit tests. In runtime package so can use runtime guts.
+
+package runtime
+
+func RunStealOrderTest() {
+ var ord randomOrder
+ for procs := 1; procs <= 64; procs++ {
+ ord.reset(uint32(procs))
+ if procs >= 3 && len(ord.coprimes) < 2 {
+ panic("too few coprimes")
+ }
+ for co := 0; co < len(ord.coprimes); co++ {
+ enum := ord.start(uint32(co))
+ checked := make([]bool, procs)
+ for p := 0; p < procs; p++ {
+ x := enum.position()
+ if checked[x] {
+ println("procs:", procs, "inc:", enum.inc)
+ panic("duplicate during enumeration")
+ }
+ checked[x] = true
+ enum.next()
+ }
+ if !enum.done() {
+ panic("not done")
+ }
+ }
+ }
+}