runtime: don't count nGsyscallNoP for extra Ms in C

author Michael Anthony Knyszek <mknyszek@google.com>

Thu, 4 Dec 2025 23:27:03 +0000 (23:27 +0000)

committer Michael Knyszek <mknyszek@google.com>

Fri, 5 Dec 2025 21:48:24 +0000 (13:48 -0800)
author Michael Anthony Knyszek <mknyszek@google.com>
Thu, 4 Dec 2025 23:27:03 +0000 (23:27 +0000)
committer Michael Knyszek <mknyszek@google.com>
Fri, 5 Dec 2025 21:48:24 +0000 (13:48 -0800)
diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go

index 92cec75465ce4cc2c93ccfaf699c092e05982026..efd2c2adb9bd851407e148c8fc2d0f5c794b6ef7 100644 (file)
--- a/src/runtime/metrics_test.go
+++ b/src/runtime/metrics_test.go
@@ -1584,3 +1584,18 @@ func TestReadMetricsSched(t *testing.T) {
                 t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
         }
  }
+
+func TestNotInGoMetricCallback(t *testing.T) {
+       switch runtime.GOOS {
+       case "windows", "plan9":
+               t.Skip("unsupported on Windows and Plan9")
+       }
+
+       // This test is run in a subprocess to prevent other tests from polluting the metrics
+       // and because we need to make some cgo callbacks.
+       output := runTestProg(t, "testprogcgo", "NotInGoMetricCallback")
+       want := "OK\n"
+       if output != want {
+               t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
+       }
+}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go

index 16538098cf6c152627d8936cdf15a635f9de24fd..52def488ffca42f8a9cf3547020a709c551050f3 100644 (file)
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2433,7 +2433,7 @@ func needm(signal bool) {
         sp := sys.GetCallerSP()
         callbackUpdateSystemStack(mp, sp, signal)
  
-       // Should mark we are already in Go now.
+       // We must mark that we are already in Go now.
         // Otherwise, we may call needm again when we get a signal, before cgocallbackg1,
         // which means the extram list may be empty, that will cause a deadlock.
         mp.isExtraInC = false
@@ -2455,7 +2455,8 @@ func needm(signal bool) {
         // mp.curg is now a real goroutine.
         casgstatus(mp.curg, _Gdeadextra, _Gsyscall)
         sched.ngsys.Add(-1)
-       sched.nGsyscallNoP.Add(1)
+       // N.B. We do not update nGsyscallNoP, because isExtraInC threads are not
+       // counted as real goroutines while they're in C.
  
         if !signal {
                 if trace.ok() {
@@ -2590,7 +2591,7 @@ func dropm() {
         casgstatus(mp.curg, _Gsyscall, _Gdeadextra)
         mp.curg.preemptStop = false
         sched.ngsys.Add(1)
-       sched.nGsyscallNoP.Add(-1)
+       decGSyscallNoP(mp)
  
         if !mp.isExtraInSig {
                 if trace.ok() {
@@ -4732,7 +4733,7 @@ func entersyscallHandleGCWait(trace traceLocker) {
                 if trace.ok() {
                         trace.ProcStop(pp)
                 }
-               sched.nGsyscallNoP.Add(1)
+               addGSyscallNoP(gp.m) // We gave up our P voluntarily.
                 pp.gcStopTime = nanotime()
                 pp.syscalltick++
                 if sched.stopwait--; sched.stopwait == 0 {
@@ -4763,7 +4764,7 @@ func entersyscallblock() {
         gp.m.syscalltick = gp.m.p.ptr().syscalltick
         gp.m.p.ptr().syscalltick++
  
-       sched.nGsyscallNoP.Add(1)
+       addGSyscallNoP(gp.m) // We're going to give up our P.
  
         // Leave SP around for GC and traceback.
         pc := sys.GetCallerPC()
@@ -5001,8 +5002,8 @@ func exitsyscallTryGetP(oldp *p) *p {
         if oldp != nil {
                 if thread, ok := setBlockOnExitSyscall(oldp); ok {
                         thread.takeP()
+                       addGSyscallNoP(thread.mp) // takeP does the opposite, but this is a net zero change.
                         thread.resume()
-                       sched.nGsyscallNoP.Add(-1) // takeP adds 1.
                         return oldp
                 }
         }
@@ -5017,7 +5018,7 @@ func exitsyscallTryGetP(oldp *p) *p {
                 }
                 unlock(&sched.lock)
                 if pp != nil {
-                       sched.nGsyscallNoP.Add(-1)
+                       decGSyscallNoP(getg().m) // We got a P for ourselves.
                         return pp
                 }
         }
@@ -5043,7 +5044,7 @@ func exitsyscallNoP(gp *g) {
                 trace.GoSysExit(true)
                 traceRelease(trace)
         }
-       sched.nGsyscallNoP.Add(-1)
+       decGSyscallNoP(getg().m)
         dropg()
         lock(&sched.lock)
         var pp *p
@@ -5081,6 +5082,41 @@ func exitsyscallNoP(gp *g) {
         schedule() // Never returns.
  }
  
+// addGSyscallNoP must be called when a goroutine in a syscall loses its P.
+// This function updates all relevant accounting.
+//
+// nosplit because it's called on the syscall paths.
+//
+//go:nosplit
+func addGSyscallNoP(mp *m) {
+       // It's safe to read isExtraInC here because it's only mutated
+       // outside of _Gsyscall, and we know this thread is attached
+       // to a goroutine in _Gsyscall and blocked from exiting.
+       if !mp.isExtraInC {
+               // Increment nGsyscallNoP since we're taking away a P
+               // from a _Gsyscall goroutine, but only if isExtraInC
+               // is not set on the M. If it is, then this thread is
+               // back to being a full C thread, and will just inflate
+               // the count of not-in-go goroutines. See go.dev/issue/76435.
+               sched.nGsyscallNoP.Add(1)
+       }
+}
+
+// decGSsyscallNoP must be called whenever a goroutine in a syscall without
+// a P exits the system call. This function updates all relevant accounting.
+//
+// nosplit because it's called from dropm.
+//
+//go:nosplit
+func decGSyscallNoP(mp *m) {
+       // Update nGsyscallNoP, but only if this is not a thread coming
+       // out of C. See the comment in addGSyscallNoP. This logic must match,
+       // to avoid unmatched increments and decrements.
+       if !mp.isExtraInC {
+               sched.nGsyscallNoP.Add(-1)
+       }
+}
+
  // Called from syscall package before fork.
  //
  // syscall_runtime_BeforeFork is for package syscall,
@@ -6758,7 +6794,7 @@ func (s syscallingThread) releaseP(state uint32) {
                 trace.ProcSteal(s.pp)
                 traceRelease(trace)
         }
-       sched.nGsyscallNoP.Add(1)
+       addGSyscallNoP(s.mp)
         s.pp.syscalltick++
  }
  
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go

index cd75e2dd7c5a5e3ff428c6ccadcd1c9fcadb245d..fde378ff25ce540dfecd06510a225c93e3adb919 100644 (file)
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -945,7 +945,7 @@ type schedt struct {
         nmfreed      int64          // cumulative number of freed m's
  
         ngsys        atomic.Int32 // number of system goroutines
-       nGsyscallNoP atomic.Int32 // number of goroutines in syscalls without a P
+       nGsyscallNoP atomic.Int32 // number of goroutines in syscalls without a P but whose M is not isExtraInC
  
         pidle        puintptr // idle p's
         npidle       atomic.Int32
diff --git a/src/runtime/testdata/testprogcgo/notingo.go b/src/runtime/testdata/testprogcgo/notingo.go

new file mode 100644 (file)

index 0000000..e5b1062
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/notingo.go
@@ -0,0 +1,108 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !plan9 && !windows
+// +build !plan9,!windows
+
+package main
+
+/*
+#include <stdatomic.h>
+#include <stddef.h>
+#include <pthread.h>
+
+extern void Ready();
+
+static int spinning;
+static int released;
+
+static void* enterGoThenSpinTwice(void* arg __attribute__ ((unused))) {
+       Ready();
+       atomic_fetch_add(&spinning, 1);
+       while(atomic_load(&released) == 0) {};
+
+       Ready();
+       atomic_fetch_add(&spinning, 1);
+       while(1) {};
+       return NULL;
+}
+
+static void SpinTwiceInNewCThread() {
+       pthread_t tid;
+       pthread_create(&tid, NULL, enterGoThenSpinTwice, NULL);
+}
+
+static int Spinning() {
+       return atomic_load(&spinning);
+}
+
+static void Release() {
+       atomic_store(&spinning, 0);
+       atomic_store(&released, 1);
+}
+*/
+import "C"
+
+import (
+       "os"
+       "runtime"
+       "runtime/metrics"
+)
+
+func init() {
+       register("NotInGoMetricCallback", NotInGoMetricCallback)
+}
+
+func NotInGoMetricCallback() {
+       const N = 10
+       s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}}
+
+       // Create N new C threads that have called into Go at least once.
+       for range N {
+               C.SpinTwiceInNewCThread()
+       }
+
+       // Synchronize with spinning threads twice.
+       //
+       // This helps catch bad accounting by taking at least a couple other
+       // codepaths which would cause the accounting to change.
+       for i := range 2 {
+               // Make sure they pass through Go.
+               // N.B. Ready is called twice by the new threads.
+               for j := range N {
+                       <-readyCh
+                       if j == 2 {
+                               // Try to trigger an update in the immediate STW handoff case.
+                               runtime.ReadMemStats(&m)
+                       }
+               }
+
+               // Make sure they're back in C.
+               for C.Spinning() < N {
+               }
+
+               // Do something that stops the world to take all the Ps back.
+               runtime.ReadMemStats(&m)
+
+               if i == 0 {
+                       C.Release()
+               }
+       }
+
+       // Read not-in-go.
+       metrics.Read(s)
+       if n := s[0].Value.Uint64(); n != 0 {
+               println("expected 0 not-in-go goroutines, found", n)
+               os.Exit(2)
+       }
+       println("OK")
+}
+
+var m runtime.MemStats
+var readyCh = make(chan bool)
+
+//export Ready
+func Ready() {
+       readyCh <- true
+}
author	Michael Anthony Knyszek <mknyszek@google.com>
	Thu, 4 Dec 2025 23:27:03 +0000 (23:27 +0000)
committer	Michael Knyszek <mknyszek@google.com>
	Fri, 5 Dec 2025 21:48:24 +0000 (13:48 -0800)
src/runtime/metrics_test.go		patch \| blob \| history
src/runtime/proc.go		patch \| blob \| history
src/runtime/runtime2.go		patch \| blob \| history
src/runtime/testdata/testprogcgo/notingo.go	[new file with mode: 0644]	patch \| blob