From f3312124c2370c2f64a7f9ad29732ec30209647a Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Mon, 6 Oct 2025 14:38:47 -0400 Subject: [PATCH] runtime: remove batching from spanSPMC free Added in CL 700496, freeSomeSpanSPMCs attempts to bound tail latency by processing at most 64 entries at a time, as well as returning early if it notices a preemption request. Both of those are attempts to reduce tail latency, as we cannot preempt the function while it holds the lock. This scheme is based on a similar scheme in freeSomeWbufs. freeSomeWbufs has a key difference: all workbufs in its list are unconditionally freed. So freeSomeWbufs will always make forward progress in each call (unless it is constantly preempted). In contrast, freeSomeSpanSPMCs only frees "dead" entries. If the list contains >64 live entries, a call may make no progress, and the caller will simply keep calling in a loop forever, until the GC ends at which point it returns success early. The infinite loop likely restarts at the next GC cycle. The queues are used on each P, so it is easy to have 64 permanently live queues if GOMAXPROCS >= 64. If GOMAXPROCS < 64, it is possible to transiently have more queues, but spanQueue.drain increases queue size in an attempt to reach a steady state of one queue per P. We must drop work.spanSPMCs.lock to allow preemption, but dropping the lock allows mutation of the linked list, meaning we cannot simply continue iteration after retaking lock. Since there is no straightforward resolution to this and we expect this to generally only be around 1 entry per P, simply remove the batching and process the entire list without preemption. We may want to revisit this in the future for very high GOMAXPROCS or if application regularly otherwise create very long lists. Fixes #75771. Change-Id: I6a6a636cd3be443aacde5a678c460aa7066b4c4a Reviewed-on: https://go-review.googlesource.com/c/go/+/709575 Reviewed-by: Michael Knyszek LUCI-TryBot-Result: Go LUCI --- src/runtime/mgc.go | 3 +-- src/runtime/mgcmark_greenteagc.go | 23 ++++++++++------------- src/runtime/mgcmark_nogreenteagc.go | 4 ++-- src/runtime/mgcsweep.go | 5 +---- 4 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index b13ec845fc..b4b4485629 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -2046,8 +2046,7 @@ func gcSweep(mode gcMode) bool { prepareFreeWorkbufs() for freeSomeWbufs(false) { } - for freeSomeSpanSPMCs(false) { - } + freeDeadSpanSPMCs() // All "free" events for this mark/sweep cycle have // now happened, so we can make this profile cycle // available immediately. diff --git a/src/runtime/mgcmark_greenteagc.go b/src/runtime/mgcmark_greenteagc.go index 3975e1e76b..7f8d60349f 100644 --- a/src/runtime/mgcmark_greenteagc.go +++ b/src/runtime/mgcmark_greenteagc.go @@ -722,13 +722,8 @@ func (r *spanSPMC) slot(i uint32) *objptr { return (*objptr)(unsafe.Add(unsafe.Pointer(r.ring), idx*unsafe.Sizeof(objptr(0)))) } -// freeSomeSpanSPMCs frees some spanSPMCs back to the OS and returns -// true if it should be called again to free more. -func freeSomeSpanSPMCs(preemptible bool) bool { - // TODO(mknyszek): This is arbitrary, but some kind of limit is necessary - // to help bound delays to cooperatively preempt ourselves. - const batchSize = 64 - +// freeDeadSpanSPMCs frees dead spanSPMCs back to the OS. +func freeDeadSpanSPMCs() { // According to the SPMC memory management invariants, we can only free // spanSPMCs outside of the mark phase. We ensure we do this in two ways. // @@ -740,18 +735,21 @@ func freeSomeSpanSPMCs(preemptible bool) bool { // // This way, we ensure that we don't start freeing if we're in the wrong // phase, and the phase can't change on us while we're freeing. + // + // TODO(go.dev/issue/75771): Due to the grow semantics in + // spanQueue.drain, we expect a steady-state of around one spanSPMC per + // P, with some spikes higher when Ps have more than one. For high + // GOMAXPROCS, or if this list otherwise gets long, it would be nice to + // have a way to batch work that allows preemption during processing. lock(&work.spanSPMCs.lock) if gcphase != _GCoff || work.spanSPMCs.all == nil { unlock(&work.spanSPMCs.lock) - return false + return } rp := &work.spanSPMCs.all - gp := getg() - more := true - for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ { + for { r := *rp if r == nil { - more = false break } if r.dead.Load() { @@ -766,7 +764,6 @@ func freeSomeSpanSPMCs(preemptible bool) bool { } } unlock(&work.spanSPMCs.lock) - return more } // tryStealSpan attempts to steal a span from another P's local queue. diff --git a/src/runtime/mgcmark_nogreenteagc.go b/src/runtime/mgcmark_nogreenteagc.go index 9838887f7b..883c3451ab 100644 --- a/src/runtime/mgcmark_nogreenteagc.go +++ b/src/runtime/mgcmark_nogreenteagc.go @@ -67,8 +67,8 @@ type spanSPMC struct { _ sys.NotInHeap } -func freeSomeSpanSPMCs(preemptible bool) bool { - return false +func freeDeadSpanSPMCs() { + return } type objptr uintptr diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 364cdb58cc..c3d6afb90a 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -307,10 +307,7 @@ func bgsweep(c chan int) { // N.B. freeSomeWbufs is already batched internally. goschedIfBusy() } - for freeSomeSpanSPMCs(true) { - // N.B. freeSomeSpanSPMCs is already batched internally. - goschedIfBusy() - } + freeDeadSpanSPMCs() lock(&sweep.lock) if !isSweepDone() { // This can happen if a GC runs between -- 2.52.0