From: Michael Anthony Knyszek Date: Fri, 19 May 2023 19:12:35 +0000 (+0000) Subject: runtime: cache inner pinner on P X-Git-Tag: go1.21rc1~288 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0adcc5ace8c01d8bf587827e672120d74fbaca36;p=gostls13.git runtime: cache inner pinner on P This change caches the *pinner on the P to pool it and reduce the chance that a new allocation is made. It also makes the *pinner no longer drop its refs array on unpin, also to avoid reallocating. The Pinner benchmark results before and after this CL are attached at the bottom of the commit message. Note that these results are biased toward the current change because of the last two benchmark changes. Reusing the pinner in the benchmark itself achieves similar performance before this change. The benchmark results thus basically just confirm that this change does cache the inner pinner in a useful way. Using the previous benchmarks there's actually a slight regression from the extra check in the cache, however the long pole is still setPinned itself. name old time/op new time/op delta PinnerPinUnpinBatch-8 42.2µs ± 2% 41.5µs ± 1% ~ (p=0.056 n=5+5) PinnerPinUnpinBatchDouble-8 367µs ± 1% 350µs ± 1% -4.67% (p=0.008 n=5+5) PinnerPinUnpinBatchTiny-8 108µs ± 0% 102µs ± 1% -6.22% (p=0.008 n=5+5) PinnerPinUnpin-8 592ns ± 8% 40ns ± 1% -93.29% (p=0.008 n=5+5) PinnerPinUnpinTiny-8 693ns ± 9% 39ns ± 1% -94.31% (p=0.008 n=5+5) PinnerPinUnpinDouble-8 843ns ± 5% 124ns ± 3% -85.24% (p=0.008 n=5+5) PinnerPinUnpinParallel-8 1.11µs ± 5% 0.00µs ± 0% -99.55% (p=0.008 n=5+5) PinnerPinUnpinParallelTiny-8 1.12µs ± 8% 0.00µs ± 1% -99.55% (p=0.008 n=5+5) PinnerPinUnpinParallelDouble-8 1.79µs ± 4% 0.58µs ± 6% -67.36% (p=0.008 n=5+5) PinnerIsPinnedOnPinned-8 5.78ns ± 0% 5.80ns ± 1% ~ (p=0.548 n=5+5) PinnerIsPinnedOnUnpinned-8 4.99ns ± 1% 4.98ns ± 0% ~ (p=0.841 n=5+5) PinnerIsPinnedOnPinnedParallel-8 0.71ns ± 0% 0.71ns ± 0% ~ (p=0.175 n=5+5) PinnerIsPinnedOnUnpinnedParallel-8 0.67ns ± 1% 0.66ns ± 0% ~ (p=0.167 n=5+5) name old alloc/op new alloc/op delta PinnerPinUnpinBatch-8 20.1kB ± 0% 20.0kB ± 0% -0.32% (p=0.008 n=5+5) PinnerPinUnpinBatchDouble-8 52.7kB ± 0% 52.7kB ± 0% -0.12% (p=0.008 n=5+5) PinnerPinUnpinBatchTiny-8 20.1kB ± 0% 20.0kB ± 0% -0.32% (p=0.008 n=5+5) PinnerPinUnpin-8 64.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) PinnerPinUnpinTiny-8 64.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) PinnerPinUnpinDouble-8 64.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) PinnerPinUnpinParallel-8 64.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) PinnerPinUnpinParallelTiny-8 64.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) PinnerPinUnpinParallelDouble-8 64.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) PinnerIsPinnedOnPinned-8 0.00B 0.00B ~ (all equal) PinnerIsPinnedOnUnpinned-8 0.00B 0.00B ~ (all equal) PinnerIsPinnedOnPinnedParallel-8 0.00B 0.00B ~ (all equal) PinnerIsPinnedOnUnpinnedParallel-8 0.00B 0.00B ~ (all equal) name old allocs/op new allocs/op delta PinnerPinUnpinBatch-8 9.00 ± 0% 8.00 ± 0% -11.11% (p=0.008 n=5+5) PinnerPinUnpinBatchDouble-8 11.0 ± 0% 10.0 ± 0% -9.09% (p=0.008 n=5+5) PinnerPinUnpinBatchTiny-8 9.00 ± 0% 8.00 ± 0% -11.11% (p=0.008 n=5+5) PinnerPinUnpin-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) PinnerPinUnpinTiny-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) PinnerPinUnpinDouble-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) PinnerPinUnpinParallel-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) PinnerPinUnpinParallelTiny-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) PinnerPinUnpinParallelDouble-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) PinnerIsPinnedOnPinned-8 0.00 0.00 ~ (all equal) PinnerIsPinnedOnUnpinned-8 0.00 0.00 ~ (all equal) PinnerIsPinnedOnPinnedParallel-8 0.00 0.00 ~ (all equal) PinnerIsPinnedOnUnpinnedParallel-8 0.00 0.00 ~ (all equal) For #46787. Change-Id: I0cdfad77b189c425868944a4faeff3d5b97417b9 Reviewed-on: https://go-review.googlesource.com/c/go/+/497615 Reviewed-by: Austin Clements Run-TryBot: Michael Knyszek Reviewed-by: Ansiwen TryBot-Result: Gopher Robot Auto-Submit: Michael Knyszek --- diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index c119308441..c8e68807ee 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1085,6 +1085,9 @@ func gcMarkTermination() { // having pages get stuck on them. These pages are hidden from // the scavenger, so in small idle heaps a significant amount // of additional memory might be held onto. + // + // Also, flush the pinner cache, to avoid leaking that memory + // indefinitely. systemstack(func() { forEachP(func(pp *p) { pp.mcache.prepareForSweep() @@ -1095,6 +1098,7 @@ func gcMarkTermination() { unlock(&mheap_.lock) }) } + pp.pinnerCache = nil }) }) // Now that we've swept stale spans in mcaches, they don't diff --git a/src/runtime/pinner.go b/src/runtime/pinner.go index 94c9e92432..2f28db10c0 100644 --- a/src/runtime/pinner.go +++ b/src/runtime/pinner.go @@ -27,17 +27,34 @@ type Pinner struct { // local variable. If one of these conditions is not met, Pin will panic. func (p *Pinner) Pin(pointer any) { if p.pinner == nil { - p.pinner = new(pinner) - p.refs = p.refStore[:0] - SetFinalizer(p.pinner, func(i *pinner) { - if len(i.refs) != 0 { - i.unpin() // only required to make the test idempotent - pinnerLeakPanic() - } - }) + // Check the pinner cache first. + mp := acquirem() + if pp := mp.p.ptr(); pp != nil { + p.pinner = pp.pinnerCache + pp.pinnerCache = nil + } + releasem(mp) + + if p.pinner == nil { + // Didn't get anything from the pinner cache. + p.pinner = new(pinner) + p.refs = p.refStore[:0] + + // We set this finalizer once and never clear it. Thus, if the + // pinner gets cached, we'll reuse it, along with its finalizer. + // This lets us avoid the relatively expensive SetFinalizer call + // when reusing from the cache. The finalizer however has to be + // resilient to an empty pinner being finalized, which is done + // by checking p.refs' length. + SetFinalizer(p.pinner, func(i *pinner) { + if len(i.refs) != 0 { + i.unpin() // only required to make the test idempotent + pinnerLeakPanic() + } + }) + } } ptr := pinnerGetPtr(&pointer) - setPinned(ptr, true) p.refs = append(p.refs, ptr) } @@ -45,6 +62,17 @@ func (p *Pinner) Pin(pointer any) { // Unpin all pinned objects of the Pinner. func (p *Pinner) Unpin() { p.pinner.unpin() + + mp := acquirem() + if pp := mp.p.ptr(); pp != nil && pp.pinnerCache == nil { + // Put the pinner back in the cache, but only if the + // cache is empty. If application code is reusing Pinners + // on its own, we want to leave the backing store in place + // so reuse is more efficient. + pp.pinnerCache = p.pinner + p.pinner = nil + } + releasem(mp) } const ( @@ -63,8 +91,10 @@ func (p *pinner) unpin() { } for i := range p.refs { setPinned(p.refs[i], false) - p.refs[i] = nil } + // The following two lines make all pointers to references + // in p.refs unreachable, either by deleting them or dropping + // p.refs' backing store (if it was not backed by refStore). p.refStore = [pinnerRefStoreSize]unsafe.Pointer{} p.refs = p.refStore[:0] } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 56518fd3af..886f7bdca9 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -5088,6 +5088,7 @@ func (pp *p) destroy() { pp.sudogbuf[i] = nil } pp.sudogcache = pp.sudogbuf[:0] + pp.pinnerCache = nil for j := range pp.deferpoolbuf { pp.deferpoolbuf[j] = nil } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 59271a6001..f4c76abd1c 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -675,6 +675,10 @@ type p struct { buf [128]*mspan } + // Cache of a single pinner object to reduce allocations from repeated + // pinner creation. + pinnerCache *pinner + trace pTraceState palloc persistentAlloc // per-P to avoid mutex