From e0dba45c620866bb16cd0db2c51732f03a9b27f3 Mon Sep 17 00:00:00 2001 From: Dmitrii Martynov Date: Mon, 7 Apr 2025 17:08:19 +0300 Subject: [PATCH] runtime: size field for gQueue and gList Before CL, all instances of gQueue and gList stored the size of structures in a separate variable. The size changed manually and passed as a separate argument to different functions. This CL added an additional field to gQueue and gList structures to store the size. Also, the calculation of size was moved into the implementation of API for these structures. This allows to reduce possible errors by eliminating manual calculation of the size and simplifying functions' signatures. Change-Id: I087da2dfaec4925e4254ad40fce5ccb4c175ec41 Reviewed-on: https://go-review.googlesource.com/c/go/+/664777 LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Pratt Reviewed-by: Keith Randall Reviewed-by: Keith Randall Auto-Submit: Keith Randall --- src/runtime/mgc.go | 4 +- src/runtime/mgcmark.go | 8 +-- src/runtime/proc.go | 155 +++++++++++++++++----------------------- src/runtime/runtime2.go | 10 +-- 4 files changed, 72 insertions(+), 105 deletions(-) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 25345abca9..923cc276b9 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1512,9 +1512,9 @@ func gcBgMarkWorker(ready chan struct{}) { // everything out of the run // queue so it can run // somewhere else. - if drainQ, n := runqdrain(pp); n > 0 { + if drainQ := runqdrain(pp); !drainQ.empty() { lock(&sched.lock) - globrunqputbatch(&drainQ, int32(n)) + globrunqputbatch(&drainQ) unlock(&sched.lock) } } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 92ef215ee0..583f79e75d 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -301,16 +301,16 @@ func markrootFreeGStacks() { } // Free stacks. - q := gQueue{list.head, list.head} + var tail *g for gp := list.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { + tail = gp stackfree(gp.stack) gp.stack.lo = 0 gp.stack.hi = 0 - // Manipulate the queue directly since the Gs are - // already all linked the right way. - q.tail.set(gp) } + q := gQueue{list.head, tail.guintptr(), list.size} + // Put Gs back on the free list. lock(&sched.gFree.lock) sched.gFree.noStack.pushAll(q) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 8f603021e5..8f07b39360 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -3058,7 +3058,7 @@ func handoffp(pp *p) { // findrunnable would return a G to run on pp. // if it has local work, start it straight away - if !runqempty(pp) || sched.runqsize != 0 { + if !runqempty(pp) || !sched.runq.empty() { startm(pp, false, false) return } @@ -3097,7 +3097,7 @@ func handoffp(pp *p) { notewakeup(&sched.safePointNote) } } - if sched.runqsize != 0 { + if !sched.runq.empty() { unlock(&sched.lock) startm(pp, false, false) return @@ -3343,7 +3343,7 @@ top: // Check the global runnable queue once in a while to ensure fairness. // Otherwise two goroutines can completely occupy the local runqueue // by constantly respawning each other. - if pp.schedtick%61 == 0 && sched.runqsize > 0 { + if pp.schedtick%61 == 0 && !sched.runq.empty() { lock(&sched.lock) gp := globrunqget() unlock(&sched.lock) @@ -3368,12 +3368,12 @@ top: } // global runq - if sched.runqsize != 0 { + if !sched.runq.empty() { lock(&sched.lock) - gp, q, qsize := globrunqgetbatch(int32(len(pp.runq)) / 2) + gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) unlock(&sched.lock) if gp != nil { - if runqputbatch(pp, &q, qsize); !q.empty() { + if runqputbatch(pp, &q); !q.empty() { throw("Couldn't put Gs into empty local runq") } return gp, false, false @@ -3485,13 +3485,13 @@ top: unlock(&sched.lock) goto top } - if sched.runqsize != 0 { - gp, q, qsize := globrunqgetbatch(int32(len(pp.runq)) / 2) + if !sched.runq.empty() { + gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) unlock(&sched.lock) if gp == nil { throw("global runq empty with non-zero runqsize") } - if runqputbatch(pp, &q, qsize); !q.empty() { + if runqputbatch(pp, &q); !q.empty() { throw("Couldn't put Gs into empty local runq") } return gp, false, false @@ -3563,15 +3563,15 @@ top: // Check global and P runqueues again. lock(&sched.lock) - if sched.runqsize != 0 { + if !sched.runq.empty() { pp, _ := pidlegetSpinning(0) if pp != nil { - gp, q, qsize := globrunqgetbatch(int32(len(pp.runq)) / 2) + gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) unlock(&sched.lock) if gp == nil { throw("global runq empty with non-zero runqsize") } - if runqputbatch(pp, &q, qsize); !q.empty() { + if runqputbatch(pp, &q); !q.empty() { throw("Couldn't put Gs into empty local runq") } acquirep(pp) @@ -3688,7 +3688,7 @@ top: // background work loops, like idle GC. It checks a subset of the // conditions checked by the actual scheduler. func pollWork() bool { - if sched.runqsize != 0 { + if !sched.runq.empty() { return true } p := getg().m.p.ptr() @@ -3935,13 +3935,10 @@ func injectglist(glist *gList) { // Mark all the goroutines as runnable before we put them // on the run queues. - head := glist.head.ptr() var tail *g - qsize := 0 trace := traceAcquire() - for gp := head; gp != nil; gp = gp.schedlink.ptr() { + for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { tail = gp - qsize++ casgstatus(gp, _Gwaiting, _Grunnable) if trace.ok() { trace.GoUnpark(gp, 0) @@ -3952,13 +3949,11 @@ func injectglist(glist *gList) { } // Turn the gList into a gQueue. - var q gQueue - q.head.set(head) - q.tail.set(tail) + q := gQueue{glist.head, tail.guintptr(), glist.size} *glist = gList{} - startIdle := func(n int) { - for i := 0; i < n; i++ { + startIdle := func(n int32) { + for ; n > 0; n-- { mp := acquirem() // See comment in startm. lock(&sched.lock) @@ -3977,37 +3972,32 @@ func injectglist(glist *gList) { pp := getg().m.p.ptr() if pp == nil { + n := q.size lock(&sched.lock) - globrunqputbatch(&q, int32(qsize)) + globrunqputbatch(&q) unlock(&sched.lock) - startIdle(qsize) + startIdle(n) return } - npidle := int(sched.npidle.Load()) - var ( - globq gQueue - n int - ) - for n = 0; n < npidle && !q.empty(); n++ { + var globq gQueue + npidle := sched.npidle.Load() + for ; npidle > 0 && !q.empty(); npidle-- { g := q.pop() globq.pushBack(g) } - if n > 0 { + if !globq.empty() { + n := globq.size lock(&sched.lock) - globrunqputbatch(&globq, int32(n)) + globrunqputbatch(&globq) unlock(&sched.lock) startIdle(n) - qsize -= n } - if !q.empty() { - qsize = int(runqputbatch(pp, &q, int32(qsize))) - if !q.empty() { - lock(&sched.lock) - globrunqputbatch(&q, int32(qsize)) - unlock(&sched.lock) - } + if runqputbatch(pp, &q); !q.empty() { + lock(&sched.lock) + globrunqputbatch(&q) + unlock(&sched.lock) } // Some P's might have become idle after we loaded `sched.npidle` @@ -4089,7 +4079,6 @@ top: unlock(&sched.lock) } else { sched.disable.runnable.pushBack(gp) - sched.disable.n++ unlock(&sched.lock) goto top } @@ -5233,27 +5222,22 @@ func gfput(pp *p, gp *g) { } pp.gFree.push(gp) - pp.gFree.n++ - if pp.gFree.n >= 64 { + if pp.gFree.size >= 64 { var ( - inc int32 stackQ gQueue noStackQ gQueue ) - for pp.gFree.n >= 32 { + for pp.gFree.size >= 32 { gp := pp.gFree.pop() - pp.gFree.n-- if gp.stack.lo == 0 { noStackQ.push(gp) } else { stackQ.push(gp) } - inc++ } lock(&sched.gFree.lock) sched.gFree.noStack.pushAll(noStackQ) sched.gFree.stack.pushAll(stackQ) - sched.gFree.n += inc unlock(&sched.gFree.lock) } } @@ -5265,7 +5249,7 @@ retry: if pp.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) { lock(&sched.gFree.lock) // Move a batch of free Gs to the P. - for pp.gFree.n < 32 { + for pp.gFree.size < 32 { // Prefer Gs with stacks. gp := sched.gFree.stack.pop() if gp == nil { @@ -5274,9 +5258,7 @@ retry: break } } - sched.gFree.n-- pp.gFree.push(gp) - pp.gFree.n++ } unlock(&sched.gFree.lock) goto retry @@ -5285,7 +5267,6 @@ retry: if gp == nil { return nil } - pp.gFree.n-- if gp.stack.lo != 0 && gp.stack.hi-gp.stack.lo != uintptr(startingStackSize) { // Deallocate old stack. We kept it in gfput because it was the // right size when the goroutine was put on the free list, but @@ -5320,24 +5301,20 @@ retry: // Purge all cached G's from gfree list to the global list. func gfpurge(pp *p) { var ( - inc int32 stackQ gQueue noStackQ gQueue ) for !pp.gFree.empty() { gp := pp.gFree.pop() - pp.gFree.n-- if gp.stack.lo == 0 { noStackQ.push(gp) } else { stackQ.push(gp) } - inc++ } lock(&sched.gFree.lock) sched.gFree.noStack.pushAll(noStackQ) sched.gFree.stack.pushAll(stackQ) - sched.gFree.n += inc unlock(&sched.gFree.lock) } @@ -5453,9 +5430,9 @@ func badunlockosthread() { } func gcount() int32 { - n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.n - sched.ngsys.Load() + n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.stack.size - sched.gFree.noStack.size - sched.ngsys.Load() for _, pp := range allp { - n -= pp.gFree.n + n -= pp.gFree.size } // All these variables can be changed concurrently, so the result can be inconsistent. @@ -6425,7 +6402,7 @@ func schedtrace(detailed bool) { } lock(&sched.lock) - print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle.Load(), " threads=", mcount(), " spinningthreads=", sched.nmspinning.Load(), " needspinning=", sched.needspinning.Load(), " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize) + print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle.Load(), " threads=", mcount(), " spinningthreads=", sched.nmspinning.Load(), " needspinning=", sched.needspinning.Load(), " idlethreads=", sched.nmidle, " runqueue=", sched.runq.size) if detailed { print(" gcwaiting=", sched.gcwaiting.Load(), " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait.Load(), "\n") } @@ -6443,7 +6420,7 @@ func schedtrace(detailed bool) { } else { print("nil") } - print(" runqsize=", t-h, " gfreecnt=", pp.gFree.n, " timerslen=", len(pp.timers.heap), "\n") + print(" runqsize=", t-h, " gfreecnt=", pp.gFree.size, " timerslen=", len(pp.timers.heap), "\n") } else { // In non-detailed mode format lengths of per-P run queues as: // [ len1 len2 len3 len4 ] @@ -6527,9 +6504,8 @@ func schedEnableUser(enable bool) { } sched.disable.user = !enable if enable { - n := sched.disable.n - sched.disable.n = 0 - globrunqputbatch(&sched.disable.runnable, n) + n := sched.disable.runnable.size + globrunqputbatch(&sched.disable.runnable) unlock(&sched.lock) for ; n != 0 && sched.npidle.Load() != 0; n-- { startm(nil, false, false) @@ -6591,7 +6567,6 @@ func globrunqput(gp *g) { assertLockHeld(&sched.lock) sched.runq.pushBack(gp) - sched.runqsize++ } // Put gp at the head of the global runnable queue. @@ -6603,7 +6578,6 @@ func globrunqputhead(gp *g) { assertLockHeld(&sched.lock) sched.runq.push(gp) - sched.runqsize++ } // Put a batch of runnable goroutines on the global runnable queue. @@ -6612,11 +6586,10 @@ func globrunqputhead(gp *g) { // May run during STW, so write barriers are not allowed. // //go:nowritebarrierrec -func globrunqputbatch(batch *gQueue, n int32) { +func globrunqputbatch(batch *gQueue) { assertLockHeld(&sched.lock) sched.runq.pushBackAll(*batch) - sched.runqsize += n *batch = gQueue{} } @@ -6625,32 +6598,27 @@ func globrunqputbatch(batch *gQueue, n int32) { func globrunqget() *g { assertLockHeld(&sched.lock) - if sched.runqsize == 0 { + if sched.runq.size == 0 { return nil } - sched.runqsize-- - return sched.runq.pop() } // Try get a batch of G's from the global runnable queue. // sched.lock must be held. -func globrunqgetbatch(n int32) (gp *g, q gQueue, qsize int32) { +func globrunqgetbatch(n int32) (gp *g, q gQueue) { assertLockHeld(&sched.lock) - if sched.runqsize == 0 { + if sched.runq.size == 0 { return } - n = min(n, sched.runqsize, sched.runqsize/gomaxprocs+1) - - sched.runqsize -= n + n = min(n, sched.runq.size, sched.runq.size/gomaxprocs+1) gp = sched.runq.pop() n-- - qsize = n for ; n > 0; n-- { gp1 := sched.runq.pop() q.pushBack(gp1) @@ -6872,23 +6840,22 @@ func runqputslow(pp *p, gp *g, h, t uint32) bool { for i := uint32(0); i < n; i++ { batch[i].schedlink.set(batch[i+1]) } - var q gQueue - q.head.set(batch[0]) - q.tail.set(batch[n]) + + q := gQueue{batch[0].guintptr(), batch[n].guintptr(), int32(n + 1)} // Now put the batch on global queue. lock(&sched.lock) - globrunqputbatch(&q, int32(n+1)) + globrunqputbatch(&q) unlock(&sched.lock) return true } // runqputbatch tries to put all the G's on q on the local runnable queue. -// If the local runq is full the updated size of the input queue will be returned. +// If the local runq is full the input queue still contains unqueued Gs. // Executed only by the owner P. -func runqputbatch(pp *p, q *gQueue, qsize int32) int32 { - if qsize == 0 { - return 0 +func runqputbatch(pp *p, q *gQueue) { + if q.empty() { + return } h := atomic.LoadAcq(&pp.runqhead) t := pp.runqtail @@ -6899,7 +6866,6 @@ func runqputbatch(pp *p, q *gQueue, qsize int32) int32 { t++ n++ } - qsize -= int32(n) if randomizeScheduler { off := func(o uint32) uint32 { @@ -6913,7 +6879,7 @@ func runqputbatch(pp *p, q *gQueue, qsize int32) int32 { atomic.StoreRel(&pp.runqtail, t) - return qsize + return } // Get g from local runnable queue. @@ -6945,11 +6911,10 @@ func runqget(pp *p) (gp *g, inheritTime bool) { // runqdrain drains the local runnable queue of pp and returns all goroutines in it. // Executed only by the owner P. -func runqdrain(pp *p) (drainQ gQueue, n uint32) { +func runqdrain(pp *p) (drainQ gQueue) { oldNext := pp.runnext if oldNext != 0 && pp.runnext.cas(oldNext, 0) { drainQ.pushBack(oldNext.ptr()) - n++ } retry: @@ -6977,7 +6942,6 @@ retry: for i := uint32(0); i < qn; i++ { gp := pp.runq[(h+i)%uint32(len(pp.runq))].ptr() drainQ.pushBack(gp) - n++ } return } @@ -7065,6 +7029,7 @@ func runqsteal(pp, p2 *p, stealRunNextG bool) *g { type gQueue struct { head guintptr tail guintptr + size int32 } // empty reports whether q is empty. @@ -7079,6 +7044,7 @@ func (q *gQueue) push(gp *g) { if q.tail == 0 { q.tail.set(gp) } + q.size++ } // pushBack adds gp to the tail of q. @@ -7090,6 +7056,7 @@ func (q *gQueue) pushBack(gp *g) { q.head.set(gp) } q.tail.set(gp) + q.size++ } // pushBackAll adds all Gs in q2 to the tail of q. After this q2 must @@ -7105,6 +7072,7 @@ func (q *gQueue) pushBackAll(q2 gQueue) { q.head = q2.head } q.tail = q2.tail + q.size += q2.size } // pop removes and returns the head of queue q. It returns nil if @@ -7116,13 +7084,14 @@ func (q *gQueue) pop() *g { if q.head == 0 { q.tail = 0 } + q.size-- } return gp } // popList takes all Gs in q and returns them as a gList. func (q *gQueue) popList() gList { - stack := gList{q.head} + stack := gList{q.head, q.size} *q = gQueue{} return stack } @@ -7131,6 +7100,7 @@ func (q *gQueue) popList() gList { // on one gQueue or gList at a time. type gList struct { head guintptr + size int32 } // empty reports whether l is empty. @@ -7142,13 +7112,15 @@ func (l *gList) empty() bool { func (l *gList) push(gp *g) { gp.schedlink = l.head l.head.set(gp) + l.size++ } -// pushAll prepends all Gs in q to l. +// pushAll prepends all Gs in q to l. After this q must not be used. func (l *gList) pushAll(q gQueue) { if !q.empty() { q.tail.ptr().schedlink = l.head l.head = q.head + l.size += q.size } } @@ -7157,6 +7129,7 @@ func (l *gList) pop() *g { gp := l.head.ptr() if gp != nil { l.head = gp.schedlink + l.size-- } return gp } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 4318930d9c..e56b45053e 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -671,10 +671,7 @@ type p struct { runnext guintptr // Available G's (status == Gdead) - gFree struct { - gList - n int32 - } + gFree gList sudogcache []*sudog sudogbuf [128]*sudog @@ -785,8 +782,7 @@ type schedt struct { needspinning atomic.Uint32 // See "Delicate dance" comment in proc.go. Boolean. Must hold sched.lock to set to 1. // Global runnable queue. - runq gQueue - runqsize int32 + runq gQueue // disable controls selective disabling of the scheduler. // @@ -797,7 +793,6 @@ type schedt struct { // user disables scheduling of user goroutines. user bool runnable gQueue // pending runnable Gs - n int32 // length of runnable } // Global cache of dead G's. @@ -805,7 +800,6 @@ type schedt struct { lock mutex stack gList // Gs with stacks noStack gList // Gs without stacks - n int32 } // Central cache of sudog structs. -- 2.50.0