From 745349712e837ef77eb7b5a21c4d4e5c7ca0371a Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 4 Dec 2025 23:27:03 +0000 Subject: [PATCH] runtime: don't count nGsyscallNoP for extra Ms in C In #76435, it turns out that the new metric /sched/goroutines/not-in-go:goroutines counts C threads that have called into Go before (on Linux) as not-in-go goroutines. The reason for this is that the M is still attached to the C thread on Linux as an optimization, so we don't go through all the trouble of detaching the M and, of course, decrementing nGsyscallNoP. There's an easy fix to this accounting issue. The flag on the M, isExtraInC, says whether a thread with an extra M attached no longer has any Go on its (logical) stack. When we take the P from an M in this state, we simply just don't increment nGsyscallNoP. When it calls back into Go, we similarly skip the decrement to nGsyscallNoP. This is more efficient than alternatives, like always updating nGsyscallNoP in cgocallbackg, since that would add a new read-modify-write atomic onto that fast path. It does mean we count threads in C with a P still attached as not-in-go, but this transient in most real programs, assuming the thread indeed does not call back into Go any time soon. Fixes #76435. Change-Id: Id05563bacbe35d3fae17d67fb5ed45fa43fa0548 Reviewed-on: https://go-review.googlesource.com/c/go/+/726964 LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Pratt --- src/runtime/metrics_test.go | 15 +++ src/runtime/proc.go | 54 ++++++++-- src/runtime/runtime2.go | 2 +- src/runtime/testdata/testprogcgo/notingo.go | 108 ++++++++++++++++++++ 4 files changed, 169 insertions(+), 10 deletions(-) create mode 100644 src/runtime/testdata/testprogcgo/notingo.go diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 92cec75465..efd2c2adb9 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -1584,3 +1584,18 @@ func TestReadMetricsSched(t *testing.T) { t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want) } } + +func TestNotInGoMetricCallback(t *testing.T) { + switch runtime.GOOS { + case "windows", "plan9": + t.Skip("unsupported on Windows and Plan9") + } + + // This test is run in a subprocess to prevent other tests from polluting the metrics + // and because we need to make some cgo callbacks. + output := runTestProg(t, "testprogcgo", "NotInGoMetricCallback") + want := "OK\n" + if output != want { + t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want) + } +} diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 16538098cf..52def488ff 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2433,7 +2433,7 @@ func needm(signal bool) { sp := sys.GetCallerSP() callbackUpdateSystemStack(mp, sp, signal) - // Should mark we are already in Go now. + // We must mark that we are already in Go now. // Otherwise, we may call needm again when we get a signal, before cgocallbackg1, // which means the extram list may be empty, that will cause a deadlock. mp.isExtraInC = false @@ -2455,7 +2455,8 @@ func needm(signal bool) { // mp.curg is now a real goroutine. casgstatus(mp.curg, _Gdeadextra, _Gsyscall) sched.ngsys.Add(-1) - sched.nGsyscallNoP.Add(1) + // N.B. We do not update nGsyscallNoP, because isExtraInC threads are not + // counted as real goroutines while they're in C. if !signal { if trace.ok() { @@ -2590,7 +2591,7 @@ func dropm() { casgstatus(mp.curg, _Gsyscall, _Gdeadextra) mp.curg.preemptStop = false sched.ngsys.Add(1) - sched.nGsyscallNoP.Add(-1) + decGSyscallNoP(mp) if !mp.isExtraInSig { if trace.ok() { @@ -4732,7 +4733,7 @@ func entersyscallHandleGCWait(trace traceLocker) { if trace.ok() { trace.ProcStop(pp) } - sched.nGsyscallNoP.Add(1) + addGSyscallNoP(gp.m) // We gave up our P voluntarily. pp.gcStopTime = nanotime() pp.syscalltick++ if sched.stopwait--; sched.stopwait == 0 { @@ -4763,7 +4764,7 @@ func entersyscallblock() { gp.m.syscalltick = gp.m.p.ptr().syscalltick gp.m.p.ptr().syscalltick++ - sched.nGsyscallNoP.Add(1) + addGSyscallNoP(gp.m) // We're going to give up our P. // Leave SP around for GC and traceback. pc := sys.GetCallerPC() @@ -5001,8 +5002,8 @@ func exitsyscallTryGetP(oldp *p) *p { if oldp != nil { if thread, ok := setBlockOnExitSyscall(oldp); ok { thread.takeP() + addGSyscallNoP(thread.mp) // takeP does the opposite, but this is a net zero change. thread.resume() - sched.nGsyscallNoP.Add(-1) // takeP adds 1. return oldp } } @@ -5017,7 +5018,7 @@ func exitsyscallTryGetP(oldp *p) *p { } unlock(&sched.lock) if pp != nil { - sched.nGsyscallNoP.Add(-1) + decGSyscallNoP(getg().m) // We got a P for ourselves. return pp } } @@ -5043,7 +5044,7 @@ func exitsyscallNoP(gp *g) { trace.GoSysExit(true) traceRelease(trace) } - sched.nGsyscallNoP.Add(-1) + decGSyscallNoP(getg().m) dropg() lock(&sched.lock) var pp *p @@ -5081,6 +5082,41 @@ func exitsyscallNoP(gp *g) { schedule() // Never returns. } +// addGSyscallNoP must be called when a goroutine in a syscall loses its P. +// This function updates all relevant accounting. +// +// nosplit because it's called on the syscall paths. +// +//go:nosplit +func addGSyscallNoP(mp *m) { + // It's safe to read isExtraInC here because it's only mutated + // outside of _Gsyscall, and we know this thread is attached + // to a goroutine in _Gsyscall and blocked from exiting. + if !mp.isExtraInC { + // Increment nGsyscallNoP since we're taking away a P + // from a _Gsyscall goroutine, but only if isExtraInC + // is not set on the M. If it is, then this thread is + // back to being a full C thread, and will just inflate + // the count of not-in-go goroutines. See go.dev/issue/76435. + sched.nGsyscallNoP.Add(1) + } +} + +// decGSsyscallNoP must be called whenever a goroutine in a syscall without +// a P exits the system call. This function updates all relevant accounting. +// +// nosplit because it's called from dropm. +// +//go:nosplit +func decGSyscallNoP(mp *m) { + // Update nGsyscallNoP, but only if this is not a thread coming + // out of C. See the comment in addGSyscallNoP. This logic must match, + // to avoid unmatched increments and decrements. + if !mp.isExtraInC { + sched.nGsyscallNoP.Add(-1) + } +} + // Called from syscall package before fork. // // syscall_runtime_BeforeFork is for package syscall, @@ -6758,7 +6794,7 @@ func (s syscallingThread) releaseP(state uint32) { trace.ProcSteal(s.pp) traceRelease(trace) } - sched.nGsyscallNoP.Add(1) + addGSyscallNoP(s.mp) s.pp.syscalltick++ } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index cd75e2dd7c..fde378ff25 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -945,7 +945,7 @@ type schedt struct { nmfreed int64 // cumulative number of freed m's ngsys atomic.Int32 // number of system goroutines - nGsyscallNoP atomic.Int32 // number of goroutines in syscalls without a P + nGsyscallNoP atomic.Int32 // number of goroutines in syscalls without a P but whose M is not isExtraInC pidle puintptr // idle p's npidle atomic.Int32 diff --git a/src/runtime/testdata/testprogcgo/notingo.go b/src/runtime/testdata/testprogcgo/notingo.go new file mode 100644 index 0000000000..e5b1062e9e --- /dev/null +++ b/src/runtime/testdata/testprogcgo/notingo.go @@ -0,0 +1,108 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !plan9 && !windows +// +build !plan9,!windows + +package main + +/* +#include +#include +#include + +extern void Ready(); + +static int spinning; +static int released; + +static void* enterGoThenSpinTwice(void* arg __attribute__ ((unused))) { + Ready(); + atomic_fetch_add(&spinning, 1); + while(atomic_load(&released) == 0) {}; + + Ready(); + atomic_fetch_add(&spinning, 1); + while(1) {}; + return NULL; +} + +static void SpinTwiceInNewCThread() { + pthread_t tid; + pthread_create(&tid, NULL, enterGoThenSpinTwice, NULL); +} + +static int Spinning() { + return atomic_load(&spinning); +} + +static void Release() { + atomic_store(&spinning, 0); + atomic_store(&released, 1); +} +*/ +import "C" + +import ( + "os" + "runtime" + "runtime/metrics" +) + +func init() { + register("NotInGoMetricCallback", NotInGoMetricCallback) +} + +func NotInGoMetricCallback() { + const N = 10 + s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}} + + // Create N new C threads that have called into Go at least once. + for range N { + C.SpinTwiceInNewCThread() + } + + // Synchronize with spinning threads twice. + // + // This helps catch bad accounting by taking at least a couple other + // codepaths which would cause the accounting to change. + for i := range 2 { + // Make sure they pass through Go. + // N.B. Ready is called twice by the new threads. + for j := range N { + <-readyCh + if j == 2 { + // Try to trigger an update in the immediate STW handoff case. + runtime.ReadMemStats(&m) + } + } + + // Make sure they're back in C. + for C.Spinning() < N { + } + + // Do something that stops the world to take all the Ps back. + runtime.ReadMemStats(&m) + + if i == 0 { + C.Release() + } + } + + // Read not-in-go. + metrics.Read(s) + if n := s[0].Value.Uint64(); n != 0 { + println("expected 0 not-in-go goroutines, found", n) + os.Exit(2) + } + println("OK") +} + +var m runtime.MemStats +var readyCh = make(chan bool) + +//export Ready +func Ready() { + readyCh <- true +} -- 2.52.0