]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: track goroutine location until actual STW
authorMichael Pratt <mpratt@google.com>
Thu, 20 Nov 2025 21:21:09 +0000 (16:21 -0500)
committerGopher Robot <gobot@golang.org>
Thu, 20 Nov 2025 23:01:20 +0000 (15:01 -0800)
TestTraceSTW / TestTraceGCSTW currently tracks the location (M/P) of the
target goroutines until it reaches the "start" log message, assuming the
actual STW comes immediately afterwards.

On 386 with TestTraceGCSTW, it actually tends to take >10ms after the
start log before the STW actually occurs. This is enough time for sysmon
to preempt the target goroutines and migration them to another location.

Fix this by continuing tracking all the way until the STW itself occurs.
We still keep the start log message so we can ignore any STW (if any)
before we expect.

Cq-Include-Trybots: luci.golang.try:gotip-linux-386-longtest,gotip-linux-amd64-longtest
Change-Id: I6a6a636cf2dcb18d8b33ac4ad88333cabff2eabb
Reviewed-on: https://go-review.googlesource.com/c/go/+/722520
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/runtime/proc_test.go

index 35a1aeab1f660a6bf8b425c845c8fd149bc50086..cff8775cc974fa03c3450b916417b7edc8094b13 100644 (file)
@@ -1321,11 +1321,13 @@ func runTestTracesSTW(t *testing.T, run int, name, stwType string) (err error) {
        //
        // 2. Once found, track which M and P the target goroutines run on until...
        //
-       // 3. Look for the "TraceSTW" "start" log message, where we commit the
-       // target goroutines' "before" M and P.
+       // 3. Look for the first STW after the "TraceSTW" "start" log message,
+       // where we commit the target goroutines' "before" M and P.
        //
        // N.B. We must do (1) and (2) together because the first target
        // goroutine may start running before the second is created.
+       var startLogSeen bool
+       var stwSeen bool
 findStart:
        for {
                ev, err := br.ReadEvent()
@@ -1384,10 +1386,26 @@ findStart:
 
                        // Found start point, move on to next stage.
                        t.Logf("Found start message")
-                       break findStart
+                       startLogSeen = true
+               case trace.EventRangeBegin:
+                       if !startLogSeen {
+                               // Ignore spurious STW before we expect.
+                               continue
+                       }
+
+                       r := ev.Range()
+                       if r.Name == stwType {
+                               t.Logf("Found STW")
+                               stwSeen = true
+                               break findStart
+                       }
                }
        }
 
+       if !stwSeen {
+               t.Fatal("Can't find STW in the test trace")
+       }
+
        t.Log("Target goroutines:")
        for _, gs := range targetGoroutines {
                t.Logf("%+v", gs)
@@ -1440,7 +1458,6 @@ findStart:
        // [1] This is slightly fragile because there is a small window between
        // the "start" log and actual STW during which the target goroutines
        // could legitimately migrate.
-       var stwSeen bool
        var pRunning []trace.ProcID
        var gRunning []trace.GoID
 findEnd:
@@ -1543,21 +1560,9 @@ findEnd:
                        // Found end point.
                        t.Logf("Found end message")
                        break findEnd
-               case trace.EventRangeBegin:
-                       r := ev.Range()
-                       if r.Name == stwType {
-                               // Note when we see the STW begin. This is not
-                               // load bearing; it's purpose is simply to fail
-                               // the test if we accidentally remove the STW.
-                               stwSeen = true
-                       }
                }
        }
 
-       if !stwSeen {
-               t.Fatal("No STW in the test trace")
-       }
-
        return nil
 }