From: Austin Clements Date: Tue, 24 May 2016 02:14:53 +0000 (-0400) Subject: runtime: pass gcWork to scanstack X-Git-Tag: go1.7beta1~90 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=3be48b4dc8c50a6568afcac0113e61a8f6e5224a;p=gostls13.git runtime: pass gcWork to scanstack Currently scanstack obtains its own gcWork from the P for the duration of the stack scan and then, if called during mark termination, disposes the gcWork. However, this means that the number of workbufs allocated will be at least the number of stacks scanned during mark termination, which may be very high (especially during a STW GC). This happens because, in steady state, each scanstack will obtain a fresh workbuf (either from the empty list or by allocating it), fill it with the scan results, and then dispose it to the full list. Nothing is consuming from the full list during this (and hence nothing is recycling them to the empty list), so the length of the full list by the time mark termination starts draining it is at least the number of stacks scanned. Fix this by pushing the gcWork acquisition up the stack to either the gcDrain that calls markroot that calls scanstack (which batches across many stack scans and is the path taken during STW GC) or to newstack (which is still a single scanstack call, but this is roughly bounded by the number of Ps). This fix reduces the workbuf allocation for the test program from issue #15319 from 213 MB (roughly 2KB * 1e5 goroutines) to 10 MB. Fixes #15319. Note that there's potentially a similar issue in write barriers during mark 2. Fixing that will be more difficult since there's no broader non-preemptible context, but it should also be less of a problem since the full list is being drained during mark 2. Some overall improvements in the go1 benchmarks, plus the usual noise. No significant change in the garbage benchmark (time/op or GC memory). name old time/op new time/op delta BinaryTree17-12 2.54s ± 1% 2.51s ± 1% -1.09% (p=0.000 n=20+19) Fannkuch11-12 2.12s ± 0% 2.17s ± 0% +2.18% (p=0.000 n=19+18) FmtFprintfEmpty-12 45.1ns ± 1% 45.2ns ± 0% ~ (p=0.078 n=19+18) FmtFprintfString-12 127ns ± 0% 128ns ± 0% +1.08% (p=0.000 n=19+16) FmtFprintfInt-12 125ns ± 0% 122ns ± 1% -2.71% (p=0.000 n=14+18) FmtFprintfIntInt-12 196ns ± 0% 190ns ± 1% -2.91% (p=0.000 n=12+20) FmtFprintfPrefixedInt-12 196ns ± 0% 194ns ± 1% -0.94% (p=0.000 n=13+18) FmtFprintfFloat-12 253ns ± 1% 251ns ± 1% -0.86% (p=0.000 n=19+20) FmtManyArgs-12 807ns ± 1% 784ns ± 1% -2.85% (p=0.000 n=20+20) GobDecode-12 7.13ms ± 1% 7.12ms ± 1% ~ (p=0.351 n=19+20) GobEncode-12 5.89ms ± 0% 5.95ms ± 0% +0.94% (p=0.000 n=19+19) Gzip-12 219ms ± 1% 221ms ± 1% +1.35% (p=0.000 n=18+20) Gunzip-12 37.5ms ± 1% 37.4ms ± 0% ~ (p=0.057 n=20+19) HTTPClientServer-12 81.4µs ± 4% 81.9µs ± 3% ~ (p=0.118 n=17+18) JSONEncode-12 15.7ms ± 1% 15.8ms ± 1% +0.73% (p=0.000 n=17+18) JSONDecode-12 57.9ms ± 1% 57.2ms ± 1% -1.34% (p=0.000 n=19+19) Mandelbrot200-12 4.12ms ± 1% 4.10ms ± 0% -0.33% (p=0.000 n=19+17) GoParse-12 3.22ms ± 2% 3.25ms ± 1% +0.72% (p=0.000 n=18+20) RegexpMatchEasy0_32-12 70.6ns ± 1% 71.1ns ± 2% +0.63% (p=0.005 n=19+20) RegexpMatchEasy0_1K-12 240ns ± 0% 239ns ± 1% -0.59% (p=0.000 n=19+20) RegexpMatchEasy1_32-12 71.3ns ± 1% 71.3ns ± 1% ~ (p=0.844 n=17+17) RegexpMatchEasy1_1K-12 384ns ± 2% 371ns ± 1% -3.45% (p=0.000 n=19+20) RegexpMatchMedium_32-12 109ns ± 1% 108ns ± 2% -0.48% (p=0.029 n=19+19) RegexpMatchMedium_1K-12 34.3µs ± 1% 34.5µs ± 2% ~ (p=0.160 n=18+20) RegexpMatchHard_32-12 1.79µs ± 9% 1.72µs ± 2% -3.83% (p=0.000 n=19+19) RegexpMatchHard_1K-12 53.3µs ± 4% 51.8µs ± 1% -2.82% (p=0.000 n=19+20) Revcomp-12 386ms ± 0% 388ms ± 0% +0.72% (p=0.000 n=17+20) Template-12 62.9ms ± 1% 62.5ms ± 1% -0.57% (p=0.010 n=18+19) TimeParse-12 325ns ± 0% 331ns ± 0% +1.84% (p=0.000 n=18+19) TimeFormat-12 338ns ± 0% 343ns ± 0% +1.34% (p=0.000 n=18+20) [Geo mean] 52.7µs 52.5µs -0.42% Change-Id: Ib2d34736c4ae2ec329605b0fbc44636038d8d018 Reviewed-on: https://go-review.googlesource.com/23391 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Rick Hudson --- diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index b0e7477d50..2d0cbd203c 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -231,7 +231,7 @@ func markroot(gcw *gcWork, i uint32) { // we scan the stacks we can and ask running // goroutines to scan themselves; and the // second blocks. - scang(gp) + scang(gp, gcw) if selfScan { casgstatus(userG, _Gwaiting, _Grunning) @@ -653,7 +653,7 @@ func gcFlushBgCredit(scanWork int64) { // //go:nowritebarrier //go:systemstack -func scanstack(gp *g) { +func scanstack(gp *g, gcw *gcWork) { if gp.gcscanvalid { return } @@ -742,7 +742,6 @@ func scanstack(gp *g) { // Scan the stack. var cache pcvalueCache - gcw := &getg().m.p.ptr().gcw n := 0 scanframe := func(frame *stkframe, unused unsafe.Pointer) bool { scanframeworker(frame, &cache, gcw) @@ -770,9 +769,6 @@ func scanstack(gp *g) { } gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0) tracebackdefers(gp, scanframe, nil) - if gcphase == _GCmarktermination { - gcw.dispose() - } gcUnlockStackBarriers(gp) if gcphase == _GCmark { // gp may have added itself to the rescan list between diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 3a37fa947b..8f98cfa8a4 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -792,7 +792,7 @@ func casgcopystack(gp *g) uint32 { // scang blocks until gp's stack has been scanned. // It might be scanned by scang or it might be scanned by the goroutine itself. // Either way, the stack scan has completed when scang returns. -func scang(gp *g) { +func scang(gp *g, gcw *gcWork) { // Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone. // Nothing is racing with us now, but gcscandone might be set to true left over // from an earlier round of stack scanning (we scan twice per GC). @@ -833,7 +833,7 @@ loop: // the goroutine until we're done. if castogscanstatus(gp, s, s|_Gscan) { if !gp.gcscandone { - scanstack(gp) + scanstack(gp, gcw) gp.gcscandone = true } restartg(gp) diff --git a/src/runtime/stack.go b/src/runtime/stack.go index f68c513fd6..33d29f19a8 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -1010,7 +1010,13 @@ func newstack() { // return. } if !gp.gcscandone { - scanstack(gp) + // gcw is safe because we're on the + // system stack. + gcw := &gp.m.p.ptr().gcw + scanstack(gp, gcw) + if gcBlackenPromptly { + gcw.dispose() + } gp.gcscandone = true } gp.preemptscan = false