]> Cypherpunks repositories - gostls13.git/commitdiff
runtime/metrics: add metrics for goroutine sched states
authorMichael Anthony Knyszek <mknyszek@google.com>
Wed, 23 Jul 2025 17:35:54 +0000 (17:35 +0000)
committerGopher Robot <gobot@golang.org>
Fri, 15 Aug 2025 18:31:00 +0000 (11:31 -0700)
This is largely a port of CL 38180.

For #15490.

Change-Id: I2726111e472e81e9f9f0f294df97872c2689f061
Reviewed-on: https://go-review.googlesource.com/c/go/+/690397
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/runtime/debug.go
src/runtime/metrics.go
src/runtime/metrics/description.go
src/runtime/metrics/doc.go
src/runtime/metrics_test.go
src/runtime/mprof.go
src/runtime/pipe_unix_test.go [new file with mode: 0644]
src/runtime/pipe_windows_test.go [new file with mode: 0644]
src/runtime/proc.go
src/runtime/runtime2.go

index dacadd2721ffa27079dc016ef4b127d1d438eb1b..56b766f11fa37dc906b6e2e5d44dbe4f59d6d913 100644 (file)
@@ -177,7 +177,7 @@ func totalMutexWaitTimeNanos() int64 {
 
 // NumGoroutine returns the number of goroutines that currently exist.
 func NumGoroutine() int {
-       return int(gcount())
+       return int(gcount(false))
 }
 
 //go:linkname debug_modinfo runtime/debug.modinfo
index ef3782b783fc326be63f993948c97b5363629449..47b1b891e140e13274f837bbebe2fc93ce4ba689 100644 (file)
@@ -8,6 +8,7 @@ package runtime
 
 import (
        "internal/godebugs"
+       "internal/runtime/atomic"
        "internal/runtime/gc"
        "unsafe"
 )
@@ -465,9 +466,38 @@ func initMetrics() {
                        },
                },
                "/sched/goroutines:goroutines": {
-                       compute: func(_ *statAggregate, out *metricValue) {
+                       deps: makeStatDepSet(schedStatsDep),
+                       compute: func(in *statAggregate, out *metricValue) {
+                               out.kind = metricKindUint64
+                               out.scalar = uint64(in.schedStats.gTotal)
+                       },
+               },
+               "/sched/goroutines/not-in-go:goroutines": {
+                       deps: makeStatDepSet(schedStatsDep),
+                       compute: func(in *statAggregate, out *metricValue) {
+                               out.kind = metricKindUint64
+                               out.scalar = uint64(in.schedStats.gNonGo)
+                       },
+               },
+               "/sched/goroutines/running:goroutines": {
+                       deps: makeStatDepSet(schedStatsDep),
+                       compute: func(in *statAggregate, out *metricValue) {
+                               out.kind = metricKindUint64
+                               out.scalar = uint64(in.schedStats.gRunning)
+                       },
+               },
+               "/sched/goroutines/runnable:goroutines": {
+                       deps: makeStatDepSet(schedStatsDep),
+                       compute: func(in *statAggregate, out *metricValue) {
+                               out.kind = metricKindUint64
+                               out.scalar = uint64(in.schedStats.gRunnable)
+                       },
+               },
+               "/sched/goroutines/waiting:goroutines": {
+                       deps: makeStatDepSet(schedStatsDep),
+                       compute: func(in *statAggregate, out *metricValue) {
                                out.kind = metricKindUint64
-                               out.scalar = uint64(gcount())
+                               out.scalar = uint64(in.schedStats.gWaiting)
                        },
                },
                "/sched/latencies:seconds": {
@@ -547,6 +577,7 @@ const (
        cpuStatsDep                  // corresponds to cpuStatsAggregate
        gcStatsDep                   // corresponds to gcStatsAggregate
        finalStatsDep                // corresponds to finalStatsAggregate
+       schedStatsDep                // corresponds to schedStatsAggregate
        numStatsDeps
 )
 
@@ -740,6 +771,80 @@ func (a *finalStatsAggregate) compute() {
        a.cleanupsQueued, a.cleanupsExecuted = gcCleanups.readQueueStats()
 }
 
+// schedStatsAggregate contains stats about the scheduler, including
+// an approximate count of goroutines in each state.
+type schedStatsAggregate struct {
+       gTotal    uint64
+       gRunning  uint64
+       gRunnable uint64
+       gNonGo    uint64
+       gWaiting  uint64
+}
+
+// compute populates the schedStatsAggregate with values from the runtime.
+func (a *schedStatsAggregate) compute() {
+       // Lock the scheduler so the global run queue can't change and
+       // the number of Ps can't change. This doesn't prevent the
+       // local run queues from changing, so the results are still
+       // approximate.
+       lock(&sched.lock)
+
+       // Collect running/runnable from per-P run queues.
+       for _, p := range allp {
+               if p == nil || p.status == _Pdead {
+                       break
+               }
+               switch p.status {
+               case _Prunning:
+                       a.gRunning++
+               case _Psyscall:
+                       a.gNonGo++
+               case _Pgcstop:
+                       // The world is stopping or stopped.
+                       // This is fine. The results will be
+                       // slightly odd since nothing else
+                       // is running, but it will be accurate.
+               }
+
+               for {
+                       h := atomic.Load(&p.runqhead)
+                       t := atomic.Load(&p.runqtail)
+                       next := atomic.Loaduintptr((*uintptr)(&p.runnext))
+                       runnable := int32(t - h)
+                       if atomic.Load(&p.runqhead) != h || runnable < 0 {
+                               continue
+                       }
+                       if next != 0 {
+                               runnable++
+                       }
+                       a.gRunnable += uint64(runnable)
+                       break
+               }
+       }
+
+       // Global run queue.
+       a.gRunnable += uint64(sched.runq.size)
+
+       // Account for Gs that are in _Gsyscall without a P in _Psyscall.
+       nGsyscallNoP := sched.nGsyscallNoP.Load()
+
+       // nGsyscallNoP can go negative during temporary races.
+       if nGsyscallNoP >= 0 {
+               a.gNonGo += uint64(nGsyscallNoP)
+       }
+
+       // Compute the number of blocked goroutines. We have to
+       // include system goroutines in this count because we included
+       // them above.
+       a.gTotal = uint64(gcount(true))
+       a.gWaiting = a.gTotal - (a.gRunning + a.gRunnable + a.gNonGo)
+       if a.gWaiting < 0 {
+               a.gWaiting = 0
+       }
+
+       unlock(&sched.lock)
+}
+
 // nsToSec takes a duration in nanoseconds and converts it to seconds as
 // a float64.
 func nsToSec(ns int64) float64 {
@@ -758,6 +863,7 @@ type statAggregate struct {
        cpuStats   cpuStatsAggregate
        gcStats    gcStatsAggregate
        finalStats finalStatsAggregate
+       schedStats schedStatsAggregate
 }
 
 // ensure populates statistics aggregates determined by deps if they
@@ -782,6 +888,8 @@ func (a *statAggregate) ensure(deps *statDepSet) {
                        a.gcStats.compute()
                case finalStatsDep:
                        a.finalStats.compute()
+               case schedStatsDep:
+                       a.schedStats.compute()
                }
        }
        a.ensured = a.ensured.union(missing)
index 4587f791e144ca15d8ba55f6cfe75076a59fdd90..cf22bb73adc3c2bf2a524ca7efe629ade15ef934 100644 (file)
@@ -437,6 +437,26 @@ var allDesc = []Description{
                Description: "The current runtime.GOMAXPROCS setting, or the number of operating system threads that can execute user-level Go code simultaneously.",
                Kind:        KindUint64,
        },
+       {
+               Name:        "/sched/goroutines/not-in-go:goroutines",
+               Description: "Approximate count of goroutines running or blocked in a system call or cgo call. Not guaranteed to add up to /sched/goroutines:goroutines with other goroutine metrics.",
+               Kind:        KindUint64,
+       },
+       {
+               Name:        "/sched/goroutines/runnable:goroutines",
+               Description: "Approximate count of goroutines ready to execute, but not executing. Not guaranteed to add up to /sched/goroutines:goroutines with other goroutine metrics.",
+               Kind:        KindUint64,
+       },
+       {
+               Name:        "/sched/goroutines/running:goroutines",
+               Description: "Approximate count of goroutines executing. Always less than or equal to /sched/gomaxprocs:threads. Not guaranteed to add up to /sched/goroutines:goroutines with other goroutine metrics.",
+               Kind:        KindUint64,
+       },
+       {
+               Name:        "/sched/goroutines/waiting:goroutines",
+               Description: "Approximate count of goroutines waiting on a resource (I/O or sync primitives). Not guaranteed to add up to /sched/goroutines:goroutines with other goroutine metrics.",
+               Kind:        KindUint64,
+       },
        {
                Name:        "/sched/goroutines:goroutines",
                Description: "Count of live goroutines.",
index 058769ac3ab26af089e686232836eea23ebe7343..c379b201b4b79b91ef801fab77cdb06db228088b 100644 (file)
@@ -509,6 +509,26 @@ Below is the full list of supported metrics, ordered lexicographically.
                operating system threads that can execute user-level Go code
                simultaneously.
 
+       /sched/goroutines/not-in-go:goroutines
+               Approximate count of goroutines running or blocked in
+               a system call or cgo call. Not guaranteed to add up to
+               /sched/goroutines:goroutines with other goroutine metrics.
+
+       /sched/goroutines/runnable:goroutines
+               Approximate count of goroutines ready to execute,
+               but not executing. Not guaranteed to add up to
+               /sched/goroutines:goroutines with other goroutine metrics.
+
+       /sched/goroutines/running:goroutines
+               Approximate count of goroutines executing. Always less than or
+               equal to /sched/gomaxprocs:threads. Not guaranteed to add up to
+               /sched/goroutines:goroutines with other goroutine metrics.
+
+       /sched/goroutines/waiting:goroutines
+               Approximate count of goroutines waiting on a resource
+               (I/O or sync primitives). Not guaranteed to add up to
+               /sched/goroutines:goroutines with other goroutine metrics.
+
        /sched/goroutines:goroutines
                Count of live goroutines.
 
index 5787c96084fbeb968aa4d391750eb94b46818815..5b16cbcb22d92e26cebb9c88448566752ecc655c 100644 (file)
@@ -22,6 +22,7 @@ import (
        "strings"
        "sync"
        "sync/atomic"
+       "syscall"
        "testing"
        "time"
        "unsafe"
@@ -1575,3 +1576,195 @@ func TestReadMetricsFinalizers(t *testing.T) {
                t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[1].Name, N, v0, v1)
        }
 }
+
+func TestReadMetricsSched(t *testing.T) {
+       const (
+               notInGo = iota
+               runnable
+               running
+               waiting
+       )
+       var s [4]metrics.Sample
+       s[0].Name = "/sched/goroutines/not-in-go:goroutines"
+       s[1].Name = "/sched/goroutines/runnable:goroutines"
+       s[2].Name = "/sched/goroutines/running:goroutines"
+       s[3].Name = "/sched/goroutines/waiting:goroutines"
+
+       logMetrics := func(t *testing.T, s []metrics.Sample) {
+               for i := range s {
+                       t.Logf("%s: %d", s[i].Name, s[i].Value.Uint64())
+               }
+       }
+
+       // generalSlack is the amount of goroutines we allow ourselves to be
+       // off by in any given category, either due to background system
+       // goroutines or testing package goroutines.
+       const generalSlack = 4
+
+       // waitingSlack is the max number of blocked goroutines left
+       // from other tests, the testing package, or system
+       // goroutines.
+       const waitingSlack = 100
+
+       // Make sure GC isn't running, since GC workers interfere with
+       // expected counts.
+       defer debug.SetGCPercent(debug.SetGCPercent(-1))
+       runtime.GC()
+
+       check := func(t *testing.T, s *metrics.Sample, min, max uint64) {
+               val := s.Value.Uint64()
+               if val < min {
+                       t.Errorf("%s too low; %d < %d", s.Name, val, min)
+               }
+               if val > max {
+                       t.Errorf("%s too high; %d > %d", s.Name, val, max)
+               }
+       }
+       checkEq := func(t *testing.T, s *metrics.Sample, value uint64) {
+               check(t, s, value, value)
+       }
+       spinUntil := func(f func() bool, timeout time.Duration) bool {
+               start := time.Now()
+               for time.Since(start) < timeout {
+                       if f() {
+                               return true
+                       }
+                       time.Sleep(time.Millisecond)
+               }
+               return false
+       }
+
+       // Check base values.
+       t.Run("base", func(t *testing.T) {
+               defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+               metrics.Read(s[:])
+               logMetrics(t, s[:])
+               check(t, &s[notInGo], 0, generalSlack)
+               check(t, &s[runnable], 0, generalSlack)
+               checkEq(t, &s[running], 1)
+               check(t, &s[waiting], 0, waitingSlack)
+       })
+
+       // Force Running count to be high. We'll use these goroutines
+       // for Runnable, too.
+       const count = 10
+       var ready, exit atomic.Uint32
+       for i := 0; i < count-1; i++ {
+               go func() {
+                       ready.Add(1)
+                       for exit.Load() == 0 {
+                               // Spin to get us and keep us running, but check
+                               // the exit condition so we exit out early if we're
+                               // done.
+                               start := time.Now()
+                               for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
+                               }
+                               runtime.Gosched()
+                       }
+               }()
+       }
+       for ready.Load() < count-1 {
+               runtime.Gosched()
+       }
+
+       // Be careful. We've entered a dangerous state for platforms
+       // that do not return back to the underlying system unless all
+       // goroutines are blocked, like js/wasm, since we have a bunch
+       // of runnable goroutines all spinning. We cannot write anything
+       // out.
+       if testenv.HasParallelism() {
+               t.Run("running", func(t *testing.T) {
+                       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
+                       // It can take a little bit for the scheduler to
+                       // distribute the goroutines to Ps, so retry for a
+                       // while.
+                       spinUntil(func() bool {
+                               metrics.Read(s[:])
+                               return s[running].Value.Uint64() >= count
+                       }, time.Second)
+                       logMetrics(t, s[:])
+                       check(t, &s[running], count, count+4)
+               })
+
+               // Force runnable count to be high.
+               t.Run("runnable", func(t *testing.T) {
+                       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+                       metrics.Read(s[:])
+                       logMetrics(t, s[:])
+                       checkEq(t, &s[running], 1)
+                       check(t, &s[runnable], count-1, count+generalSlack)
+               })
+
+               // Done with the running/runnable goroutines.
+               exit.Store(1)
+       } else {
+               // Read metrics and then exit all the other goroutines,
+               // so that system calls may proceed.
+               metrics.Read(s[:])
+
+               // Done with the running/runnable goroutines.
+               exit.Store(1)
+
+               // Now we can check our invariants.
+               t.Run("running", func(t *testing.T) {
+                       logMetrics(t, s[:])
+                       checkEq(t, &s[running], 1)
+               })
+               t.Run("runnable", func(t *testing.T) {
+                       logMetrics(t, s[:])
+                       check(t, &s[runnable], count-1, count+generalSlack)
+               })
+       }
+
+       // Force not-in-go count to be high. This is a little tricky since
+       // we try really hard not to let things block in system calls.
+       // We have to drop to the syscall package to do this reliably.
+       t.Run("not-in-go", func(t *testing.T) {
+               // Block a bunch of goroutines on an OS pipe.
+               pr, pw, err := pipe()
+               if err != nil {
+                       switch runtime.GOOS {
+                       case "js", "wasip1":
+                               t.Skip("creating pipe:", err)
+                       }
+                       t.Fatal("creating pipe:", err)
+               }
+               for i := 0; i < count; i++ {
+                       go syscall.Read(pr, make([]byte, 1))
+               }
+
+               // Let the goroutines block.
+               spinUntil(func() bool {
+                       metrics.Read(s[:])
+                       return s[notInGo].Value.Uint64() >= count
+               }, time.Second)
+
+               metrics.Read(s[:])
+               logMetrics(t, s[:])
+               check(t, &s[notInGo], count, count+generalSlack)
+
+               syscall.Close(pw)
+               syscall.Close(pr)
+       })
+
+       t.Run("waiting", func(t *testing.T) {
+               // Force waiting count to be high.
+               const waitingCount = 1000
+               stop = make(chan bool)
+               for i := 0; i < waitingCount; i++ {
+                       go func() { <-stop }()
+               }
+
+               // Let the goroutines block.
+               spinUntil(func() bool {
+                       metrics.Read(s[:])
+                       return s[waiting].Value.Uint64() >= waitingCount
+               }, time.Second)
+
+               metrics.Read(s[:])
+               logMetrics(t, s[:])
+               check(t, &s[waiting], waitingCount, waitingCount+waitingSlack)
+
+               close(stop)
+       })
+}
index b2ff257f65eca2a1c0bfabca8442b3dba41155a7..97b29076523d5dc214a4842d058a0805af38ddc5 100644 (file)
@@ -1308,7 +1308,7 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
                // allocation estimate without bothering to STW. As long as
                // this is close, then we'll only need to STW once (on the next
                // call).
-               return int(gcount()), false
+               return int(gcount(false)), false
        }
 
        semacquire(&goroutineProfile.sema)
@@ -1324,7 +1324,7 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
        // goroutines that can vary between user and system to ensure that the count
        // doesn't change during the collection. So, check the finalizer goroutine
        // and cleanup goroutines in particular.
-       n = int(gcount())
+       n = int(gcount(false))
        if fingStatus.Load()&fingRunningFinalizer != 0 {
                n++
        }
diff --git a/src/runtime/pipe_unix_test.go b/src/runtime/pipe_unix_test.go
new file mode 100644 (file)
index 0000000..82a49df
--- /dev/null
@@ -0,0 +1,15 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !windows
+
+package runtime_test
+
+import "syscall"
+
+func pipe() (r, w int, err error) {
+       var p [2]int
+       err = syscall.Pipe(p[:])
+       return p[0], p[1], err
+}
diff --git a/src/runtime/pipe_windows_test.go b/src/runtime/pipe_windows_test.go
new file mode 100644 (file)
index 0000000..ad84ec9
--- /dev/null
@@ -0,0 +1,13 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import "syscall"
+
+func pipe() (r, w syscall.Handle, err error) {
+       var p [2]syscall.Handle
+       err = syscall.Pipe(p[:])
+       return p[0], p[1], err
+}
index 8d5f2fc7932813ab24a56eace97a986cefa41830..15f10f087ec395e71f0f1d50a6a6867981e0fb31 100644 (file)
@@ -1648,6 +1648,7 @@ func stopTheWorldWithSema(reason stwReason) worldStop {
                        if trace.ok() {
                                trace.ProcSteal(pp, false)
                        }
+                       sched.nGsyscallNoP.Add(1)
                        pp.syscalltick++
                        pp.gcStopTime = nanotime()
                        sched.stopwait--
@@ -2174,6 +2175,7 @@ func forEachPInternal(fn func(*p)) {
                                trace.ProcSteal(p2, false)
                                traceRelease(trace)
                        }
+                       sched.nGsyscallNoP.Add(1)
                        p2.syscalltick++
                        handoffp(p2)
                } else if trace.ok() {
@@ -2447,6 +2449,7 @@ func needm(signal bool) {
        // mp.curg is now a real goroutine.
        casgstatus(mp.curg, _Gdead, _Gsyscall)
        sched.ngsys.Add(-1)
+       sched.nGsyscallNoP.Add(1)
 
        if !signal {
                if trace.ok() {
@@ -2582,6 +2585,7 @@ func dropm() {
        casgstatus(mp.curg, _Gsyscall, _Gdead)
        mp.curg.preemptStop = false
        sched.ngsys.Add(1)
+       sched.nGsyscallNoP.Add(-1)
 
        if !mp.isExtraInSig {
                if trace.ok() {
@@ -4675,6 +4679,7 @@ func entersyscall_gcwait() {
                        trace.ProcSteal(pp, true)
                        traceRelease(trace)
                }
+               sched.nGsyscallNoP.Add(1)
                pp.gcStopTime = nanotime()
                pp.syscalltick++
                if sched.stopwait--; sched.stopwait == 0 {
@@ -4707,6 +4712,8 @@ func entersyscallblock() {
        gp.m.syscalltick = gp.m.p.ptr().syscalltick
        gp.m.p.ptr().syscalltick++
 
+       sched.nGsyscallNoP.Add(1)
+
        // Leave SP around for GC and traceback.
        pc := sys.GetCallerPC()
        sp := sys.GetCallerSP()
@@ -4927,6 +4934,7 @@ func exitsyscallfast_pidle() bool {
        }
        unlock(&sched.lock)
        if pp != nil {
+               sched.nGsyscallNoP.Add(-1)
                acquirep(pp)
                return true
        }
@@ -4953,6 +4961,7 @@ func exitsyscall0(gp *g) {
                trace.GoSysExit(true)
                traceRelease(trace)
        }
+       sched.nGsyscallNoP.Add(-1)
        dropg()
        lock(&sched.lock)
        var pp *p
@@ -5528,8 +5537,11 @@ func badunlockosthread() {
        throw("runtime: internal error: misuse of lockOSThread/unlockOSThread")
 }
 
-func gcount() int32 {
-       n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.stack.size - sched.gFree.noStack.size - sched.ngsys.Load()
+func gcount(includeSys bool) int32 {
+       n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.stack.size - sched.gFree.noStack.size
+       if !includeSys {
+               n -= sched.ngsys.Load()
+       }
        for _, pp := range allp {
                n -= pp.gFree.size
        }
@@ -6404,6 +6416,7 @@ func retake(now int64) uint32 {
                                        trace.ProcSteal(pp, false)
                                        traceRelease(trace)
                                }
+                               sched.nGsyscallNoP.Add(1)
                                n++
                                pp.syscalltick++
                                handoffp(pp)
index b5d2dcefaded996dc6173d0d9a82b141611795d5..c5d15754ec93c70cbed015b269e81bbecb47879b 100644 (file)
@@ -792,7 +792,8 @@ type schedt struct {
        nmsys        int32    // number of system m's not counted for deadlock
        nmfreed      int64    // cumulative number of freed m's
 
-       ngsys atomic.Int32 // number of system goroutines
+       ngsys        atomic.Int32 // number of system goroutines
+       nGsyscallNoP atomic.Int32 // number of goroutines in syscalls without a P
 
        pidle        puintptr // idle p's
        npidle       atomic.Int32