From 78a3968c2c9f2d6e8eb6dc263b4a2517c72d71be Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 24 Jul 2025 21:38:37 +0000 Subject: [PATCH] runtime/metrics: add metric for current Go-owned thread count Fixes #15490. Change-Id: I6ce9edc46398030ff639e22d4ca4adebccdfe1b7 Reviewed-on: https://go-review.googlesource.com/c/go/+/690399 Auto-Submit: Michael Knyszek LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Pratt --- src/runtime/metrics.go | 12 ++++++++++++ src/runtime/metrics/description.go | 5 +++++ src/runtime/metrics/doc.go | 4 ++++ src/runtime/metrics_test.go | 11 ++++++++++- src/runtime/proc.go | 2 +- src/runtime/runtime2.go | 4 +++- 6 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 028de6b2e1..36efef39c0 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -532,6 +532,13 @@ func initMetrics() { sched.stwTotalTimeOther.write(out) }, }, + "/sched/threads/total:threads": { + deps: makeStatDepSet(schedStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.schedStats.threads) + }, + }, "/sync/mutex/wait/total:seconds": { compute: func(_ *statAggregate, out *metricValue) { out.kind = metricKindFloat64 @@ -787,6 +794,7 @@ type schedStatsAggregate struct { gNonGo uint64 gWaiting uint64 gCreated uint64 + threads uint64 } // compute populates the schedStatsAggregate with values from the runtime. @@ -797,6 +805,10 @@ func (a *schedStatsAggregate) compute() { // approximate. lock(&sched.lock) + // The total count of threads owned by Go is the number of Ms + // minus extra Ms on the list or in use. + a.threads = uint64(mcount()) - uint64(extraMInUse.Load()) - uint64(extraMLength.Load()) + // Collect running/runnable from per-P run queues. a.gCreated += sched.goroutinesCreated.Load() for _, p := range allp { diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index dd0b485cf8..c8c5bf9888 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -498,6 +498,11 @@ var allDesc = []Description{ Kind: KindFloat64Histogram, Cumulative: true, }, + { + Name: "/sched/threads/total:threads", + Description: "The current count of live threads that are owned by the Go runtime.", + Kind: KindUint64, + }, { Name: "/sync/mutex/wait/total:seconds", Description: "Approximate cumulative time goroutines have spent blocked on a sync.Mutex, sync.RWMutex, or runtime-internal lock. This metric is useful for identifying global changes in lock contention. Collect a mutex or block profile using the runtime/pprof package for more detailed contention data.", diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index 2d3b716a3c..00ce60dde1 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -572,6 +572,10 @@ Below is the full list of supported metrics, ordered lexicographically. /sched/pauses/stopping/other:seconds). Bucket counts increase monotonically. + /sched/threads/total:threads + The current count of live threads that are owned by the Go + runtime. + /sync/mutex/wait/total:seconds Approximate cumulative time goroutines have spent blocked on a sync.Mutex, sync.RWMutex, or runtime-internal lock. This metric diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 77223a37a7..385891d7d9 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -1584,13 +1584,16 @@ func TestReadMetricsSched(t *testing.T) { running waiting created + threads + numSamples ) - var s [5]metrics.Sample + var s [numSamples]metrics.Sample s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines" s[runnable].Name = "/sched/goroutines/runnable:goroutines" s[running].Name = "/sched/goroutines/running:goroutines" s[waiting].Name = "/sched/goroutines/waiting:goroutines" s[created].Name = "/sched/goroutines-created:goroutines" + s[threads].Name = "/sched/threads/total:threads" logMetrics := func(t *testing.T, s []metrics.Sample) { for i := range s { @@ -1608,6 +1611,10 @@ func TestReadMetricsSched(t *testing.T) { // goroutines. const waitingSlack = 100 + // threadsSlack is the maximum number of threads left over + // from other tests and the runtime (sysmon, the template thread, etc.) + const threadsSlack = 20 + // Make sure GC isn't running, since GC workers interfere with // expected counts. defer debug.SetGCPercent(debug.SetGCPercent(-1)) @@ -1694,6 +1701,7 @@ func TestReadMetricsSched(t *testing.T) { }, time.Second) logMetrics(t, s[:]) check(t, &s[running], count, count+4) + check(t, &s[threads], count, count+4+threadsSlack) }) // Force runnable count to be high. @@ -1724,6 +1732,7 @@ func TestReadMetricsSched(t *testing.T) { t.Run("running", func(t *testing.T) { logMetrics(t, s[:]) checkEq(t, &s[running], 1) + checkEq(t, &s[threads], 1) }) t.Run("runnable", func(t *testing.T) { logMetrics(t, s[:]) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 329d1b2624..68647d771f 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1007,7 +1007,7 @@ func mcommoninit(mp *m, id int64) { // when it is just in a register or thread-local storage. mp.alllink = allm - // NumCgoCall() and others iterate over allm w/o schedlock, + // NumCgoCall and others iterate over allm w/o schedlock, // so we need to publish it safely. atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp)) unlock(&sched.lock) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index a80a34a18e..042c3137cd 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -1225,7 +1225,9 @@ var isIdleInSynctest = [len(waitReasonStrings)]bool{ } var ( - allm *m + // Linked-list of all Ms. Written under sched.lock, read atomically. + allm *m + gomaxprocs int32 numCPUStartup int32 forcegc forcegcstate -- 2.51.0