]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: account for _Pgcstop in GC CPU pause time in a fine-grained way
authorMichael Anthony Knyszek <mknyszek@google.com>
Mon, 25 Mar 2024 17:50:13 +0000 (17:50 +0000)
committerGopher Robot <gobot@golang.org>
Mon, 8 Apr 2024 21:43:16 +0000 (21:43 +0000)
The previous CL, CL 570257, made it so that STW time no longer
overlapped with other CPU time tracking. However, what we lost was
insight into the CPU time spent _stopping_ the world, which can be just
as important. There's pretty much no easy way to measure this
indirectly, so this CL implements a direct measurement: whenever a P
enters _Pgcstop, it writes down what time it did so. stopTheWorld then
accumulates all the time deltas between when it finished stopping the
world and each P's stop time into a total additional pause time. The GC
pause cases then accumulate this number into the metrics.

This should cause minimal additional overhead in stopping the world. GC
STWs already take on the order of 10s to 100s of microseconds. Even for
100 Ps, the extra `nanotime` call per P is only 1500ns of additional CPU
time. This is likely to be much less in actual pause latency, since it
all happens concurrently.

Change-Id: Icf190ffea469cd35ebaf0b2587bf6358648c8554
Reviewed-on: https://go-review.googlesource.com/c/go/+/574215
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Nicolas Hillegeer <aktau@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>

src/runtime/mgc.go
src/runtime/proc.go
src/runtime/runtime2.go

index 55ae765657fba83481328592f344dcfe875cb9ab..3d3ecb0f883044723e14dd58210889ed939d8cb6 100644 (file)
@@ -682,6 +682,10 @@ func gcStart(trigger gcTrigger) {
        systemstack(func() {
                stw = stopTheWorldWithSema(stwGCSweepTerm)
        })
+
+       // Accumulate fine-grained stopping time.
+       work.cpuStats.accumulateGCPauseTime(stw.stoppingCPUTime, 1)
+
        // Finish sweep before we start concurrent scan.
        systemstack(func() {
                finishsweep_m()
@@ -872,6 +876,9 @@ top:
        // below. The important thing is that the wb remains active until
        // all marking is complete. This includes writes made by the GC.
 
+       // Accumulate fine-grained stopping time.
+       work.cpuStats.accumulateGCPauseTime(stw.stoppingCPUTime, 1)
+
        // There is sometimes work left over when we enter mark termination due
        // to write barriers performed after the completion barrier above.
        // Detect this and resume concurrent mark. This is obviously
@@ -894,6 +901,10 @@ top:
        if restart {
                getg().m.preemptoff = ""
                systemstack(func() {
+                       // Accumulate the time we were stopped before we had to start again.
+                       work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs)
+
+                       // Start the world again.
                        now := startTheWorldWithSema(0, stw)
                        work.pauseNS += now - stw.startedStopping
                })
index 1aad8e3c63157717eea38c8e5861ba3cd4cecf93..6a3c786e36a080eb1830109e073b563138a6f543 100644 (file)
@@ -1325,6 +1325,7 @@ type worldStop struct {
        reason           stwReason
        startedStopping  int64
        finishedStopping int64
+       stoppingCPUTime  int64
 }
 
 // Temporary variable for stopTheWorld, when it can't write to the stack.
@@ -1480,6 +1481,7 @@ func stopTheWorldWithSema(reason stwReason) worldStop {
        preemptall()
        // stop current P
        gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
+       gp.m.p.ptr().gcStopTime = start
        sched.stopwait--
        // try to retake all P's in Psyscall status
        trace = traceAcquire()
@@ -1491,6 +1493,7 @@ func stopTheWorldWithSema(reason stwReason) worldStop {
                                trace.ProcSteal(pp, false)
                        }
                        pp.syscalltick++
+                       pp.gcStopTime = nanotime()
                        sched.stopwait--
                }
        }
@@ -1506,6 +1509,7 @@ func stopTheWorldWithSema(reason stwReason) worldStop {
                        break
                }
                pp.status = _Pgcstop
+               pp.gcStopTime = nanotime()
                sched.stopwait--
        }
        wait := sched.stopwait > 0
@@ -1531,7 +1535,11 @@ func stopTheWorldWithSema(reason stwReason) worldStop {
                sched.stwStoppingTimeOther.record(startTime)
        }
 
-       // sanity checks
+       // Double-check we actually stopped everything, and all the invariants hold.
+       // Also accumulate all the time spent by each P in _Pgcstop up to the point
+       // where everything was stopped. This will be accumulated into the total pause
+       // CPU time by the caller.
+       stoppingCPUTime := int64(0)
        bad := ""
        if sched.stopwait != 0 {
                bad = "stopTheWorld: not stopped (stopwait != 0)"
@@ -1540,6 +1548,11 @@ func stopTheWorldWithSema(reason stwReason) worldStop {
                        if pp.status != _Pgcstop {
                                bad = "stopTheWorld: not stopped (status != _Pgcstop)"
                        }
+                       if pp.gcStopTime == 0 && bad == "" {
+                               bad = "stopTheWorld: broken CPU time accounting"
+                       }
+                       stoppingCPUTime += finish - pp.gcStopTime
+                       pp.gcStopTime = 0
                }
        }
        if freezing.Load() {
@@ -1556,7 +1569,12 @@ func stopTheWorldWithSema(reason stwReason) worldStop {
 
        worldStopped()
 
-       return worldStop{reason: reason, startedStopping: start, finishedStopping: finish}
+       return worldStop{
+               reason:           reason,
+               startedStopping:  start,
+               finishedStopping: finish,
+               stoppingCPUTime:  stoppingCPUTime,
+       }
 }
 
 // reason is the same STW reason passed to stopTheWorld. start is the start
@@ -2945,6 +2963,7 @@ func handoffp(pp *p) {
        lock(&sched.lock)
        if sched.gcwaiting.Load() {
                pp.status = _Pgcstop
+               pp.gcStopTime = nanotime()
                sched.stopwait--
                if sched.stopwait == 0 {
                        notewakeup(&sched.stopnote)
@@ -3087,6 +3106,7 @@ func gcstopm() {
        pp := releasep()
        lock(&sched.lock)
        pp.status = _Pgcstop
+       pp.gcStopTime = nanotime()
        sched.stopwait--
        if sched.stopwait == 0 {
                notewakeup(&sched.stopnote)
@@ -4404,6 +4424,7 @@ func entersyscall_gcwait() {
                        }
                        traceRelease(trace)
                }
+               pp.gcStopTime = nanotime()
                pp.syscalltick++
                if sched.stopwait--; sched.stopwait == 0 {
                        notewakeup(&sched.stopnote)
index 4a5dbf1cc82c8876753d0fbeb5fc34247a06d241..83252abb441a28f4936b6eebac9ea7dd570a4d72 100644 (file)
@@ -763,6 +763,9 @@ type p struct {
        // scheduler ASAP (regardless of what G is running on it).
        preempt bool
 
+       // gcStopTime is the nanotime timestamp that this P last entered _Pgcstop.
+       gcStopTime int64
+
        // pageTraceBuf is a buffer for writing out page allocation/free/scavenge traces.
        //
        // Used only if GOEXPERIMENT=pagetrace.