]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: move TestReadMetricsSched to testprog
authorMichael Anthony Knyszek <mknyszek@google.com>
Fri, 26 Sep 2025 17:05:43 +0000 (17:05 +0000)
committerGopher Robot <gobot@golang.org>
Fri, 26 Sep 2025 17:55:30 +0000 (10:55 -0700)
There are just too many flakes resulting from background pollution by
the testing package and other tests. Run in a subprocess where at least
the environment can be more tightly controlled.

Fixes #75049.

Change-Id: Iad59edaaf31268f1fcb77273f01317d963708fa6
Reviewed-on: https://go-review.googlesource.com/c/go/+/707155
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>

src/runtime/metrics_test.go
src/runtime/testdata/testprog/pipe_unix.go [moved from src/runtime/pipe_unix_test.go with 93% similarity]
src/runtime/testdata/testprog/pipe_windows.go [moved from src/runtime/pipe_windows_test.go with 93% similarity]
src/runtime/testdata/testprog/schedmetrics.go [new file with mode: 0644]

index af042f44456ba4e4571883643367a98127bc1f24..b67424301b42fafa3518747f66a41fa561436714 100644 (file)
@@ -22,7 +22,6 @@ import (
        "strings"
        "sync"
        "sync/atomic"
-       "syscall"
        "testing"
        "time"
        "unsafe"
@@ -1578,211 +1577,10 @@ func TestReadMetricsFinalizers(t *testing.T) {
 }
 
 func TestReadMetricsSched(t *testing.T) {
-       const (
-               notInGo = iota
-               runnable
-               running
-               waiting
-               created
-               threads
-               numSamples
-       )
-       var s [numSamples]metrics.Sample
-       s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines"
-       s[runnable].Name = "/sched/goroutines/runnable:goroutines"
-       s[running].Name = "/sched/goroutines/running:goroutines"
-       s[waiting].Name = "/sched/goroutines/waiting:goroutines"
-       s[created].Name = "/sched/goroutines-created:goroutines"
-       s[threads].Name = "/sched/threads/total:threads"
-
-       logMetrics := func(t *testing.T, s []metrics.Sample) {
-               for i := range s {
-                       t.Logf("%s: %d", s[i].Name, s[i].Value.Uint64())
-               }
-       }
-
-       // generalSlack is the amount of goroutines we allow ourselves to be
-       // off by in any given category, either due to background system
-       // goroutines or testing package goroutines.
-       const generalSlack = 4
-
-       // waitingSlack is the max number of blocked goroutines left
-       // from other tests, the testing package, or system
-       // goroutines.
-       const waitingSlack = 100
-
-       // threadsSlack is the maximum number of threads left over
-       // from other tests and the runtime (sysmon, the template thread, etc.)
-       const threadsSlack = 20
-
-       // Make sure GC isn't running, since GC workers interfere with
-       // expected counts.
-       defer debug.SetGCPercent(debug.SetGCPercent(-1))
-       runtime.GC()
-
-       check := func(t *testing.T, s *metrics.Sample, min, max uint64) {
-               val := s.Value.Uint64()
-               if val < min {
-                       t.Errorf("%s too low; %d < %d", s.Name, val, min)
-               }
-               if val > max {
-                       t.Errorf("%s too high; %d > %d", s.Name, val, max)
-               }
-       }
-       checkEq := func(t *testing.T, s *metrics.Sample, value uint64) {
-               check(t, s, value, value)
+       // This test is run in a subprocess to prevent other tests from polluting the metrics.
+       output := runTestProg(t, "testprog", "SchedMetrics")
+       want := "OK\n"
+       if output != want {
+               t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
        }
-       spinUntil := func(f func() bool) bool {
-               for {
-                       if f() {
-                               return true
-                       }
-                       time.Sleep(50 * time.Millisecond)
-               }
-       }
-
-       // Check base values.
-       t.Run("base", func(t *testing.T) {
-               defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
-               metrics.Read(s[:])
-               logMetrics(t, s[:])
-               check(t, &s[notInGo], 0, generalSlack)
-               check(t, &s[runnable], 0, generalSlack)
-               checkEq(t, &s[running], 1)
-               check(t, &s[waiting], 0, waitingSlack)
-       })
-
-       metrics.Read(s[:])
-       createdAfterBase := s[created].Value.Uint64()
-
-       // Force Running count to be high. We'll use these goroutines
-       // for Runnable, too.
-       const count = 10
-       var ready, exit atomic.Uint32
-       for i := 0; i < count-1; i++ {
-               go func() {
-                       ready.Add(1)
-                       for exit.Load() == 0 {
-                               // Spin to get us and keep us running, but check
-                               // the exit condition so we exit out early if we're
-                               // done.
-                               start := time.Now()
-                               for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
-                               }
-                               runtime.Gosched()
-                       }
-               }()
-       }
-       for ready.Load() < count-1 {
-               runtime.Gosched()
-       }
-
-       // Be careful. We've entered a dangerous state for platforms
-       // that do not return back to the underlying system unless all
-       // goroutines are blocked, like js/wasm, since we have a bunch
-       // of runnable goroutines all spinning. We cannot write anything
-       // out.
-       if testenv.HasParallelism() {
-               t.Run("created", func(t *testing.T) {
-                       metrics.Read(s[:])
-                       logMetrics(t, s[:])
-                       checkEq(t, &s[created], createdAfterBase+count)
-               })
-               t.Run("running", func(t *testing.T) {
-                       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
-                       // It can take a little bit for the scheduler to
-                       // distribute the goroutines to Ps, so retry until
-                       // we see the count we expect or the test times out.
-                       spinUntil(func() bool {
-                               metrics.Read(s[:])
-                               return s[running].Value.Uint64() >= count
-                       })
-                       logMetrics(t, s[:])
-                       check(t, &s[running], count, count+4)
-                       check(t, &s[threads], count, count+4+threadsSlack)
-               })
-
-               // Force runnable count to be high.
-               t.Run("runnable", func(t *testing.T) {
-                       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
-                       metrics.Read(s[:])
-                       logMetrics(t, s[:])
-                       checkEq(t, &s[running], 1)
-                       check(t, &s[runnable], count-1, count+generalSlack)
-               })
-
-               // Done with the running/runnable goroutines.
-               exit.Store(1)
-       } else {
-               // Read metrics and then exit all the other goroutines,
-               // so that system calls may proceed.
-               metrics.Read(s[:])
-
-               // Done with the running/runnable goroutines.
-               exit.Store(1)
-
-               // Now we can check our invariants.
-               t.Run("created", func(t *testing.T) {
-                       // Look for count-1 goroutines because we read metrics
-                       // *before* t.Run goroutine was created for this sub-test.
-                       checkEq(t, &s[created], createdAfterBase+count-1)
-               })
-               t.Run("running", func(t *testing.T) {
-                       logMetrics(t, s[:])
-                       checkEq(t, &s[running], 1)
-                       checkEq(t, &s[threads], 1)
-               })
-               t.Run("runnable", func(t *testing.T) {
-                       logMetrics(t, s[:])
-                       check(t, &s[runnable], count-1, count+generalSlack)
-               })
-       }
-
-       // Force not-in-go count to be high. This is a little tricky since
-       // we try really hard not to let things block in system calls.
-       // We have to drop to the syscall package to do this reliably.
-       t.Run("not-in-go", func(t *testing.T) {
-               // Block a bunch of goroutines on an OS pipe.
-               pr, pw, err := pipe()
-               if err != nil {
-                       switch runtime.GOOS {
-                       case "js", "wasip1":
-                               t.Skip("creating pipe:", err)
-                       }
-                       t.Fatal("creating pipe:", err)
-               }
-               for i := 0; i < count; i++ {
-                       go syscall.Read(pr, make([]byte, 1))
-               }
-
-               // Let the goroutines block.
-               spinUntil(func() bool {
-                       metrics.Read(s[:])
-                       return s[notInGo].Value.Uint64() >= count
-               })
-               logMetrics(t, s[:])
-               check(t, &s[notInGo], count, count+generalSlack)
-
-               syscall.Close(pw)
-               syscall.Close(pr)
-       })
-
-       t.Run("waiting", func(t *testing.T) {
-               // Force waiting count to be high.
-               const waitingCount = 1000
-               stop := make(chan bool)
-               for i := 0; i < waitingCount; i++ {
-                       go func() { <-stop }()
-               }
-
-               // Let the goroutines block.
-               spinUntil(func() bool {
-                       metrics.Read(s[:])
-                       return s[waiting].Value.Uint64() >= waitingCount
-               })
-               logMetrics(t, s[:])
-               check(t, &s[waiting], waitingCount, waitingCount+waitingSlack)
-
-               close(stop)
-       })
 }
similarity index 93%
rename from src/runtime/pipe_unix_test.go
rename to src/runtime/testdata/testprog/pipe_unix.go
index 82a49df3399bbe64e27724959ebae5767bd78c3c..cee4da65f6e6f36c5bfd0ec0b44480c600ea7253 100644 (file)
@@ -4,7 +4,7 @@
 
 //go:build !windows
 
-package runtime_test
+package main
 
 import "syscall"
 
similarity index 93%
rename from src/runtime/pipe_windows_test.go
rename to src/runtime/testdata/testprog/pipe_windows.go
index ad84ec918ae67ea9ba1ac8810cfcaadb3196d581..597601a1790d242a8ebc4580091b0d293c0bb323 100644 (file)
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-package runtime_test
+package main
 
 import "syscall"
 
diff --git a/src/runtime/testdata/testprog/schedmetrics.go b/src/runtime/testdata/testprog/schedmetrics.go
new file mode 100644 (file)
index 0000000..6d3f68a
--- /dev/null
@@ -0,0 +1,267 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+       "bytes"
+       "fmt"
+       "internal/testenv"
+       "log"
+       "os"
+       "runtime"
+       "runtime/debug"
+       "runtime/metrics"
+       "strings"
+       "sync/atomic"
+       "syscall"
+       "time"
+)
+
+func init() {
+       register("SchedMetrics", SchedMetrics)
+}
+
+// Tests runtime/metrics.Read for various scheduler metrics.
+//
+// Implemented in testprog to prevent other tests from polluting
+// the metrics.
+func SchedMetrics() {
+       const (
+               notInGo = iota
+               runnable
+               running
+               waiting
+               created
+               threads
+               numSamples
+       )
+       var s [numSamples]metrics.Sample
+       s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines"
+       s[runnable].Name = "/sched/goroutines/runnable:goroutines"
+       s[running].Name = "/sched/goroutines/running:goroutines"
+       s[waiting].Name = "/sched/goroutines/waiting:goroutines"
+       s[created].Name = "/sched/goroutines-created:goroutines"
+       s[threads].Name = "/sched/threads/total:threads"
+
+       var failed bool
+       var out bytes.Buffer
+       logger := log.New(&out, "", 0)
+       indent := 0
+       logf := func(s string, a ...any) {
+               var prefix strings.Builder
+               for range indent {
+                       prefix.WriteString("\t")
+               }
+               logger.Printf(prefix.String()+s, a...)
+       }
+       errorf := func(s string, a ...any) {
+               logf(s, a...)
+               failed = true
+       }
+       run := func(name string, f func()) {
+               logf("=== Checking %q", name)
+               indent++
+               f()
+               indent--
+       }
+       logMetrics := func(s []metrics.Sample) {
+               for i := range s {
+                       logf("%s: %d", s[i].Name, s[i].Value.Uint64())
+               }
+       }
+
+       // generalSlack is the amount of goroutines we allow ourselves to be
+       // off by in any given category, either due to background system
+       // goroutines. This excludes GC goroutines.
+       generalSlack := uint64(4)
+
+       // waitingSlack is the max number of blocked goroutines controlled
+       // by the runtime that we'll allow for. This includes GC goroutines
+       // as well as finalizer and cleanup goroutines.
+       waitingSlack := generalSlack + uint64(2*runtime.GOMAXPROCS(-1))
+
+       // threadsSlack is the maximum number of threads left over
+       // from the runtime (sysmon, the template thread, etc.)
+       const threadsSlack = 4
+
+       // Make sure GC isn't running, since GC workers interfere with
+       // expected counts.
+       defer debug.SetGCPercent(debug.SetGCPercent(-1))
+       runtime.GC()
+
+       check := func(s *metrics.Sample, min, max uint64) {
+               val := s.Value.Uint64()
+               if val < min {
+                       errorf("%s too low; %d < %d", s.Name, val, min)
+               }
+               if val > max {
+                       errorf("%s too high; %d > %d", s.Name, val, max)
+               }
+       }
+       checkEq := func(s *metrics.Sample, value uint64) {
+               check(s, value, value)
+       }
+       spinUntil := func(f func() bool) bool {
+               for {
+                       if f() {
+                               return true
+                       }
+                       time.Sleep(50 * time.Millisecond)
+               }
+       }
+
+       // Check base values.
+       run("base", func() {
+               defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+               metrics.Read(s[:])
+               logMetrics(s[:])
+               check(&s[notInGo], 0, generalSlack)
+               check(&s[runnable], 0, generalSlack)
+               checkEq(&s[running], 1)
+               check(&s[waiting], 0, waitingSlack)
+       })
+
+       metrics.Read(s[:])
+       createdAfterBase := s[created].Value.Uint64()
+
+       // Force Running count to be high. We'll use these goroutines
+       // for Runnable, too.
+       const count = 10
+       var ready, exit atomic.Uint32
+       for range count {
+               go func() {
+                       ready.Add(1)
+                       for exit.Load() == 0 {
+                               // Spin to get us and keep us running, but check
+                               // the exit condition so we exit out early if we're
+                               // done.
+                               start := time.Now()
+                               for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
+                               }
+                               runtime.Gosched()
+                       }
+               }()
+       }
+       for ready.Load() < count {
+               runtime.Gosched()
+       }
+
+       // Be careful. We've entered a dangerous state for platforms
+       // that do not return back to the underlying system unless all
+       // goroutines are blocked, like js/wasm, since we have a bunch
+       // of runnable goroutines all spinning. We cannot write anything
+       // out.
+       if testenv.HasParallelism() {
+               run("created", func() {
+                       metrics.Read(s[:])
+                       logMetrics(s[:])
+                       checkEq(&s[created], createdAfterBase+count)
+               })
+               run("running", func() {
+                       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
+                       // It can take a little bit for the scheduler to
+                       // distribute the goroutines to Ps, so retry until
+                       // we see the count we expect or the test times out.
+                       spinUntil(func() bool {
+                               metrics.Read(s[:])
+                               return s[running].Value.Uint64() >= count
+                       })
+                       logMetrics(s[:])
+                       check(&s[running], count, count+4)
+                       check(&s[threads], count, count+4+threadsSlack)
+               })
+
+               // Force runnable count to be high.
+               run("runnable", func() {
+                       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+                       metrics.Read(s[:])
+                       logMetrics(s[:])
+                       checkEq(&s[running], 1)
+                       check(&s[runnable], count-1, count+generalSlack)
+               })
+
+               // Done with the running/runnable goroutines.
+               exit.Store(1)
+       } else {
+               // Read metrics and then exit all the other goroutines,
+               // so that system calls may proceed.
+               metrics.Read(s[:])
+
+               // Done with the running/runnable goroutines.
+               exit.Store(1)
+
+               // Now we can check our invariants.
+               run("created", func() {
+                       // Look for count-1 goroutines because we read metrics
+                       // *before* run goroutine was created for this sub-test.
+                       checkEq(&s[created], createdAfterBase+count-1)
+               })
+               run("running", func() {
+                       logMetrics(s[:])
+                       checkEq(&s[running], 1)
+                       checkEq(&s[threads], 1)
+               })
+               run("runnable", func() {
+                       logMetrics(s[:])
+                       check(&s[runnable], count-1, count+generalSlack)
+               })
+       }
+
+       // Force not-in-go count to be high. This is a little tricky since
+       // we try really hard not to let things block in system calls.
+       // We have to drop to the syscall package to do this reliably.
+       run("not-in-go", func() {
+               // Block a bunch of goroutines on an OS pipe.
+               pr, pw, err := pipe()
+               if err != nil {
+                       switch runtime.GOOS {
+                       case "js", "wasip1":
+                               logf("creating pipe: %v", err)
+                               return
+                       }
+                       panic(fmt.Sprintf("creating pipe: %v", err))
+               }
+               for i := 0; i < count; i++ {
+                       go syscall.Read(pr, make([]byte, 1))
+               }
+
+               // Let the goroutines block.
+               spinUntil(func() bool {
+                       metrics.Read(s[:])
+                       return s[notInGo].Value.Uint64() >= count
+               })
+               logMetrics(s[:])
+               check(&s[notInGo], count, count+generalSlack)
+
+               syscall.Close(pw)
+               syscall.Close(pr)
+       })
+
+       run("waiting", func() {
+               // Force waiting count to be high.
+               const waitingCount = 1000
+               stop := make(chan bool)
+               for i := 0; i < waitingCount; i++ {
+                       go func() { <-stop }()
+               }
+
+               // Let the goroutines block.
+               spinUntil(func() bool {
+                       metrics.Read(s[:])
+                       return s[waiting].Value.Uint64() >= waitingCount
+               })
+               logMetrics(s[:])
+               check(&s[waiting], waitingCount, waitingCount+waitingSlack)
+
+               close(stop)
+       })
+
+       if failed {
+               fmt.Fprintln(os.Stderr, out.String())
+               os.Exit(1)
+       } else {
+               fmt.Fprintln(os.Stderr, "OK")
+       }
+}