]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: fix usage of stale "now" value for netpolling Ms
authorMichael Anthony Knyszek <mknyszek@google.com>
Thu, 18 May 2023 15:51:57 +0000 (15:51 +0000)
committerMichael Knyszek <mknyszek@google.com>
Tue, 23 May 2023 19:24:33 +0000 (19:24 +0000)
Currently pidleget gets passed "now" from before the M goes into
netpoll, resulting in incorrect accounting of idle CPU time.
lastpoll is also stored with a stale "now": the mistake was added in the
same CL it was added for pidleget.

Recompute "now" after returning from netpoll.

Also, start tracking idle time on js/wasm at all.

Credit to Rhys Hiltner for the test case.

Fixes #60276.

Change-Id: I5dd677471f74c915dfcf3d01621430876c3ff307
Reviewed-on: https://go-review.googlesource.com/c/go/+/496183
Reviewed-by: Michael Pratt <mpratt@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
src/runtime/lock_js.go
src/runtime/metrics_test.go
src/runtime/proc.go

index fd2abee7c448848e9dea9a99c132c278f7e982a7..91ad7be317398f3bb26a7e3e26513d4df5799caa 100644 (file)
@@ -242,7 +242,10 @@ func beforeIdle(now, pollUntil int64) (gp *g, otherReady bool) {
        return nil, false
 }
 
+var idleStart int64
+
 func handleAsyncEvent() {
+       idleStart = nanotime()
        pause(getcallersp() - 16)
 }
 
@@ -271,6 +274,8 @@ func clearTimeoutEvent(id int32)
 // When no other goroutine is awake any more, beforeIdle resumes the handler goroutine. Now that the same goroutine
 // is running as was running when the call came in from JavaScript, execution can be safely passed back to JavaScript.
 func handleEvent() {
+       sched.idleTime.Add(nanotime() - idleStart)
+
        e := &event{
                gp:       getg(),
                returned: false,
@@ -290,6 +295,7 @@ func handleEvent() {
        events = events[:len(events)-1]
 
        // return execution to JavaScript
+       idleStart = nanotime()
        pause(getcallersp() - 16)
 }
 
index 4b58914032ad803cda27f8205140db084f2d23bf..b7262cb1aded46864eabb1ae3e5ba2860a489a37 100644 (file)
@@ -636,3 +636,62 @@ func generateMutexWaitTime(mu locker2) time.Duration {
        *runtime.CasGStatusAlwaysTrack = false
        return blockTime
 }
+
+// See issue #60276.
+func TestCPUMetricsSleep(t *testing.T) {
+       if runtime.GOOS == "wasip1" {
+               // Since wasip1 busy-waits in the scheduler, there's no meaningful idle
+               // time. This is accurately reflected in the metrics, but it means this
+               // test is basically meaningless on this platform.
+               t.Skip("wasip1 currently busy-waits in idle time; test not applicable")
+       }
+       names := []string{
+               "/cpu/classes/idle:cpu-seconds",
+
+               "/cpu/classes/gc/mark/assist:cpu-seconds",
+               "/cpu/classes/gc/mark/dedicated:cpu-seconds",
+               "/cpu/classes/gc/mark/idle:cpu-seconds",
+               "/cpu/classes/gc/pause:cpu-seconds",
+               "/cpu/classes/gc/total:cpu-seconds",
+               "/cpu/classes/scavenge/assist:cpu-seconds",
+               "/cpu/classes/scavenge/background:cpu-seconds",
+               "/cpu/classes/scavenge/total:cpu-seconds",
+               "/cpu/classes/total:cpu-seconds",
+               "/cpu/classes/user:cpu-seconds",
+       }
+       prep := func() []metrics.Sample {
+               mm := make([]metrics.Sample, len(names))
+               for i := range names {
+                       mm[i].Name = names[i]
+               }
+               return mm
+       }
+       m1, m2 := prep(), prep()
+
+       // Read 1.
+       runtime.GC() // Update /cpu/classes metrics.
+       metrics.Read(m1)
+
+       // Sleep.
+       const dur = 100 * time.Millisecond
+       time.Sleep(dur)
+
+       // Read 2.
+       runtime.GC() // Update /cpu/classes metrics.
+       metrics.Read(m2)
+
+       // If the bug we expect is happening, then the Sleep CPU time will be accounted for
+       // as user time rather than idle time. Because we're doing this on one core, the
+       // maximum amount of time that can be attributed to user time is the time spent asleep.
+       minIdleCPUSeconds := dur.Seconds() * float64(runtime.GOMAXPROCS(-1))
+
+       if dt := m2[0].Value.Float64() - m1[0].Value.Float64(); dt < minIdleCPUSeconds {
+               for i := range names {
+                       if m1[i].Value.Kind() == metrics.KindBad {
+                               continue
+                       }
+                       t.Logf("%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
+               }
+               t.Errorf(`time.Sleep did not contribute enough to "idle" class: %.5fs < %.5fs`, dt, minIdleCPUSeconds)
+       }
+}
index 276d7355e9cc3f754c79dc12e5e175278ae9d58a..56518fd3af6da495dd1ae1fef899685dc49bba64 100644 (file)
@@ -3145,10 +3145,11 @@ top:
                if mp.spinning {
                        throw("findrunnable: netpoll with spinning")
                }
-               // Refresh now.
-               now = nanotime()
                delay := int64(-1)
                if pollUntil != 0 {
+                       if now == 0 {
+                               now = nanotime()
+                       }
                        delay = pollUntil - now
                        if delay < 0 {
                                delay = 0
@@ -3159,6 +3160,8 @@ top:
                        delay = 0
                }
                list := netpoll(delay) // block until new work is available
+               // Refresh now again, after potentially blocking.
+               now = nanotime()
                sched.pollUntil.Store(0)
                sched.lastpoll.Store(now)
                if faketime != 0 && list.empty() {