runtime: verify attribution of mutex delay

author Rhys Hiltner <rhys.hiltner@gmail.com>

Thu, 16 May 2024 22:48:36 +0000 (15:48 -0700)

committer Gopher Robot <gobot@golang.org>

Wed, 7 May 2025 19:22:19 +0000 (12:22 -0700)
author Rhys Hiltner <rhys.hiltner@gmail.com>
Thu, 16 May 2024 22:48:36 +0000 (15:48 -0700)
committer Gopher Robot <gobot@golang.org>
Wed, 7 May 2025 19:22:19 +0000 (12:22 -0700)
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go

index 980066df700d107d218d1d0bf30729a60c40cf9f..520b060599ae9b8ba819ad80d018105e5b856384 100644 (file)
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -40,6 +40,8 @@ var Atoi32 = atoi32
  var ParseByteCount = parseByteCount
  
  var Nanotime = nanotime
+var Cputicks = cputicks
+var CyclesPerSecond = pprof_cyclesPerSecond
  var NetpollBreak = netpollBreak
  var Usleep = usleep
  
diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go

index a036f37b975e0e249808671e8b2c99f2c3d6b2a0..178dd968f72b56a50c6283ca967829060b13739d 100644 (file)
--- a/src/runtime/metrics_test.go
+++ b/src/runtime/metrics_test.go
@@ -1020,9 +1020,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                 return metricGrowth, profileGrowth, p
         }
  
-       testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64, explain func()) {
-               return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64, explain func()) {
-                       metricGrowth, profileGrowth, p := measureDelta(t, func() {
+       testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
+               return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
+                       metricGrowth, totalProfileGrowth, p := measureDelta(t, func() {
                                 var started, stopped sync.WaitGroup
                                 started.Add(workers)
                                 stopped.Add(workers)
@@ -1042,7 +1042,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                 stopped.Wait()
                         })
  
-                       if profileGrowth == 0 {
+                       if totalProfileGrowth == 0 {
                                 t.Errorf("no increase in mutex profile")
                         }
                         if metricGrowth == 0 && strictTiming {
@@ -1062,7 +1062,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                         // together. It doesn't work as well for user-space contention, where the
                         // involved goroutines are not _Grunnable the whole time and so need to pass
                         // through the scheduler.
-                       t.Logf("lock contention growth in runtime/pprof's view  (%fs)", profileGrowth)
+                       t.Logf("lock contention growth in runtime/pprof's view  (%fs)", totalProfileGrowth)
                         t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
  
                         acceptStacks = append([][]string(nil), acceptStacks...)
@@ -1082,7 +1082,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                         }
  
                         var stks [][]string
-                       values := make([][2]int64, len(acceptStacks))
+                       values := make([][2]int64, len(acceptStacks)+1)
                         for _, s := range p.Sample {
                                 var have []string
                                 for _, loc := range s.Location {
@@ -1091,16 +1091,26 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                         }
                                 }
                                 stks = append(stks, have)
+                               found := false
                                 for i, stk := range acceptStacks {
                                         if slices.Equal(have, stk) {
                                                 values[i][0] += s.Value[0]
                                                 values[i][1] += s.Value[1]
+                                               found = true
+                                               break
                                         }
                                 }
+                               if !found {
+                                       values[len(values)-1][0] += s.Value[0]
+                                       values[len(values)-1][1] += s.Value[1]
+                               }
                         }
+                       profileGrowth = make([]int64, len(acceptStacks)+1)
+                       profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1]
                         for i, stk := range acceptStacks {
                                 n += values[i][0]
                                 value += values[i][1]
+                               profileGrowth[i] = values[i][1]
                                 t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
                         }
                         if n == 0 && value == 0 {
@@ -1122,11 +1132,6 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
         name := t.Name()
  
         t.Run("runtime.lock", func(t *testing.T) {
-               mus := make([]runtime.Mutex, 200)
-               var needContention atomic.Int64
-               delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
-               delayMicros := delay.Microseconds()
-
                 // The goroutine that acquires the lock will only proceed when it
                 // detects that its partner is contended for the lock. That will lead to
                 // live-lock if anything (such as a STW) prevents the partner goroutine
@@ -1136,11 +1141,29 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                 // will end up contended twice. Instead, disable the GC.
                 defer debug.SetGCPercent(debug.SetGCPercent(-1))
  
-               const workers = 2
-               if runtime.GOMAXPROCS(0) < workers {
-                       t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers)
+               mus := make([]runtime.Mutex, 200)
+               var needContention atomic.Int64
+
+               baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
+               fastDelayMicros := baseDelay.Microseconds()
+               slowDelayMicros := baseDelay.Microseconds() * 4
+
+               const (
+                       fastRole = iota
+                       slowRole
+                       workerCount
+               )
+               if runtime.GOMAXPROCS(0) < workerCount {
+                       t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workerCount)
                 }
  
+               minTicks := make([][]int64, workerCount) // lower bound, known-contended time, measured by cputicks
+               maxTicks := make([][]int64, workerCount) // upper bound, total lock() duration, measured by cputicks
+               for i := range minTicks {
+                       minTicks[i] = make([]int64, len(mus))
+                       maxTicks[i] = make([]int64, len(mus))
+               }
+               var id atomic.Int32
                 fn := func() bool {
                         n := int(needContention.Load())
                         if n < 0 {
@@ -1148,34 +1171,77 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                         }
                         mu := &mus[n]
  
-                       runtime.Lock(mu)
-                       for int(needContention.Load()) == n {
-                               if runtime.MutexContended(mu) {
-                                       // make them wait a little while
-                                       for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
-                                               runtime.Usleep(uint32(delayMicros))
+                       // Each worker has a role: to have a fast or slow critical section.
+                       // Rotate the role assignments as we step through the mutex slice so
+                       // we don't end up with one M always claiming the same kind of work.
+                       id := int(id.Add(1))
+                       role := (id + n) % workerCount
+
+                       marker, delayMicros := fastMarkerFrame, fastDelayMicros
+                       if role == slowRole {
+                               marker, delayMicros = slowMarkerFrame, slowDelayMicros
+                       }
+
+                       // Each lock is used by two different critical sections, one fast
+                       // and one slow, identified in profiles by their different "marker"
+                       // functions. We expect the profile to blame each for the amount of
+                       // delay it inflicts on other users of the lock. We run one worker
+                       // of each kind, so any contention in one would be due to the other.
+                       //
+                       // We measure how long our runtime.lock call takes, which sets an
+                       // upper bound on how much blame to expect for the other worker type
+                       // in the profile. And if we acquire the lock first, we wait for the
+                       // other worker to announce its contention. We measure the
+                       // known-contended time, to use as a lower bound on how much blame
+                       // we expect of ourselves in the profile. Then we stall for a little
+                       // while (different amounts for "fast" versus "slow") before
+                       // unlocking the mutex.
+
+                       marker(func() {
+                               t0 := runtime.Cputicks()
+                               runtime.Lock(mu)
+                               maxTicks[role][n] = runtime.Cputicks() - t0
+                               minTicks[role][n] = 0
+                               for int(needContention.Load()) == n {
+                                       if runtime.MutexContended(mu) {
+                                               t1 := runtime.Cputicks()
+                                               // make them wait a little while
+                                               for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
+                                                       runtime.Usleep(uint32(1 + delayMicros/8))
+                                               }
+                                               minTicks[role][n] = runtime.Cputicks() - t1
+                                               break
                                         }
-                                       break
                                 }
-                       }
-                       runtime.Unlock(mu)
-                       needContention.Store(int64(n - 1))
+                               runtime.Unlock(mu)
+                               needContention.Store(int64(n - 1))
+                       })
  
                         return true
                 }
  
-               stks := [][]string{{
-                       "runtime.unlock",
-                       "runtime_test." + name + ".func5.1",
-                       "runtime_test.(*contentionWorker).run",
-               }}
+               stks := make([][]string, 2)
+               for i := range stks {
+                       marker := "runtime_test.fastMarkerFrame"
+                       if i == slowRole {
+                               marker = "runtime_test.slowMarkerFrame"
+                       }
+
+                       stks[i] = []string{
+                               "runtime.unlock",
+                               "runtime_test." + name + ".func5.1.1",
+                               marker,
+                               "runtime_test." + name + ".func5.1",
+                               "runtime_test.(*contentionWorker).run",
+                       }
+               }
  
                 t.Run("sample-1", func(t *testing.T) {
                         old := runtime.SetMutexProfileFraction(1)
                         defer runtime.SetMutexProfileFraction(old)
  
                         needContention.Store(int64(len(mus) - 1))
-                       metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workers, fn)(t)
+                       metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
                         defer func() {
                                 if t.Failed() {
                                         explain()
@@ -1187,7 +1253,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                 // gTrackingPeriod, so we don't have a hard lower bound here.
                                 testenv.SkipFlaky(t, 64253)
  
-                               if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want {
+                               if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want {
                                         // The test imposes a delay with usleep, verified with calls to
                                         // nanotime. Compare against the runtime/metrics package's view
                                         // (based on nanotime) rather than runtime/pprof's view (based
@@ -1199,10 +1265,68 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                 t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
                         }
  
+                       var slowMinTicks, fastMinTicks int64
+                       for role, ticks := range minTicks {
+                               for _, delta := range ticks {
+                                       if role == slowRole {
+                                               slowMinTicks += delta
+                                       } else {
+                                               fastMinTicks += delta
+                                       }
+                               }
+                       }
+                       var slowMaxTicks, fastMaxTicks int64
+                       for role, ticks := range maxTicks {
+                               for _, delta := range ticks {
+                                       if role == slowRole {
+                                               slowMaxTicks += delta
+                                       } else {
+                                               fastMaxTicks += delta
+                                       }
+                               }
+                       }
+
+                       cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9
+                       for _, set := range []struct {
+                               name     string
+                               profTime int64
+                               minTime  int64
+                               maxTime  int64
+                       }{
+                               {
+                                       name:     "slow",
+                                       profTime: profileGrowth[slowRole],
+                                       minTime:  int64(float64(slowMinTicks) / cpuGHz),
+                                       maxTime:  int64(float64(fastMaxTicks) / cpuGHz),
+                               },
+                               {
+                                       name:     "fast",
+                                       profTime: profileGrowth[fastRole],
+                                       minTime:  int64(float64(fastMinTicks) / cpuGHz),
+                                       maxTime:  int64(float64(slowMaxTicks) / cpuGHz),
+                               },
+                       } {
+                               t.Logf("profile's view of delays due to %q critical section:                 %dns", set.name, set.profTime)
+                               t.Logf("test's view of known-contended time within %q critical section:      %dns", set.name, set.minTime)
+                               t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime)
+
+                               if set.profTime < set.minTime {
+                                       t.Errorf("profile undercounted %q critical section", set.name)
+                               }
+                               if set.profTime > set.maxTime {
+                                       t.Errorf("profile overcounted %q critical section", set.name)
+                               }
+                       }
+
+                       var totalProfileGrowth float64
+                       for _, growth := range profileGrowth {
+                               totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
+                       }
+
                         const slop = 1.5 // account for nanotime vs cputicks
                         t.Run("compare timers", func(t *testing.T) {
                                 testenv.SkipFlaky(t, 64253)
-                               if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth {
+                               if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth {
                                         t.Errorf("views differ by more than %fx", slop)
                                 }
                         })
@@ -1215,7 +1339,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                         defer runtime.SetMutexProfileFraction(old)
  
                         needContention.Store(int64(len(mus) - 1))
-                       metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workers, fn)(t)
+                       metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
                         defer func() {
                                 if t.Failed() {
                                         explain()
@@ -1228,7 +1352,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                         // 1e-9 chance of that, which is an acceptably low flakiness rate.
                         const samplingSlop = 2.5
  
-                       if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want {
+                       if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want {
                                 // The test imposes a delay with usleep, verified with calls to
                                 // nanotime. Compare against the runtime/metrics package's view
                                 // (based on nanotime) rather than runtime/pprof's view (based
@@ -1239,8 +1363,13 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                 t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
                         }
  
+                       var totalProfileGrowth float64
+                       for _, growth := range profileGrowth {
+                               totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
+                       }
+
                         const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
-                       if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth {
+                       if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth {
                                 t.Errorf("views differ by more than %fx", timerSlop)
                         }
                 })
@@ -1307,6 +1436,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
         })
  }
  
+func slowMarkerFrame(fn func()) { fn() }
+func fastMarkerFrame(fn func()) { fn() }
+
  // contentionWorker provides cleaner call stacks for lock contention profile tests
  type contentionWorker struct {
         before func()
author	Rhys Hiltner <rhys.hiltner@gmail.com>
	Thu, 16 May 2024 22:48:36 +0000 (15:48 -0700)
committer	Gopher Robot <gobot@golang.org>
	Wed, 7 May 2025 19:22:19 +0000 (12:22 -0700)
src/runtime/export_test.go		patch \| blob \| history
src/runtime/metrics_test.go		patch \| blob \| history