]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: verify attribution of mutex delay
authorRhys Hiltner <rhys.hiltner@gmail.com>
Thu, 16 May 2024 22:48:36 +0000 (15:48 -0700)
committerGopher Robot <gobot@golang.org>
Wed, 7 May 2025 19:22:19 +0000 (12:22 -0700)
Have the test use the same clock (cputicks) as the profiler, and use the
test's own measurements as hard bounds on the magnitude to expect in the
profile.

Compare the depiction of two users of the same lock: one where the
critical section is fast, one where it is slow. Confirm that the profile
shows the slow critical section as a large source of delay (with #66999
fixed), rather than showing the fast critical section as a large
recipient of delay.

Previously reviewed as https://go.dev/cl/586237.

For #66999

Change-Id: Ic2d78cc29153d5322577d84abdc448e95ed8f594
Reviewed-on: https://go-review.googlesource.com/c/go/+/667616
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Rhys Hiltner <rhys.hiltner@gmail.com>

src/runtime/export_test.go
src/runtime/metrics_test.go

index 980066df700d107d218d1d0bf30729a60c40cf9f..520b060599ae9b8ba819ad80d018105e5b856384 100644 (file)
@@ -40,6 +40,8 @@ var Atoi32 = atoi32
 var ParseByteCount = parseByteCount
 
 var Nanotime = nanotime
+var Cputicks = cputicks
+var CyclesPerSecond = pprof_cyclesPerSecond
 var NetpollBreak = netpollBreak
 var Usleep = usleep
 
index a036f37b975e0e249808671e8b2c99f2c3d6b2a0..178dd968f72b56a50c6283ca967829060b13739d 100644 (file)
@@ -1020,9 +1020,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                return metricGrowth, profileGrowth, p
        }
 
-       testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64, explain func()) {
-               return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64, explain func()) {
-                       metricGrowth, profileGrowth, p := measureDelta(t, func() {
+       testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
+               return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
+                       metricGrowth, totalProfileGrowth, p := measureDelta(t, func() {
                                var started, stopped sync.WaitGroup
                                started.Add(workers)
                                stopped.Add(workers)
@@ -1042,7 +1042,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                stopped.Wait()
                        })
 
-                       if profileGrowth == 0 {
+                       if totalProfileGrowth == 0 {
                                t.Errorf("no increase in mutex profile")
                        }
                        if metricGrowth == 0 && strictTiming {
@@ -1062,7 +1062,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                        // together. It doesn't work as well for user-space contention, where the
                        // involved goroutines are not _Grunnable the whole time and so need to pass
                        // through the scheduler.
-                       t.Logf("lock contention growth in runtime/pprof's view  (%fs)", profileGrowth)
+                       t.Logf("lock contention growth in runtime/pprof's view  (%fs)", totalProfileGrowth)
                        t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
 
                        acceptStacks = append([][]string(nil), acceptStacks...)
@@ -1082,7 +1082,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                        }
 
                        var stks [][]string
-                       values := make([][2]int64, len(acceptStacks))
+                       values := make([][2]int64, len(acceptStacks)+1)
                        for _, s := range p.Sample {
                                var have []string
                                for _, loc := range s.Location {
@@ -1091,16 +1091,26 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                        }
                                }
                                stks = append(stks, have)
+                               found := false
                                for i, stk := range acceptStacks {
                                        if slices.Equal(have, stk) {
                                                values[i][0] += s.Value[0]
                                                values[i][1] += s.Value[1]
+                                               found = true
+                                               break
                                        }
                                }
+                               if !found {
+                                       values[len(values)-1][0] += s.Value[0]
+                                       values[len(values)-1][1] += s.Value[1]
+                               }
                        }
+                       profileGrowth = make([]int64, len(acceptStacks)+1)
+                       profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1]
                        for i, stk := range acceptStacks {
                                n += values[i][0]
                                value += values[i][1]
+                               profileGrowth[i] = values[i][1]
                                t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
                        }
                        if n == 0 && value == 0 {
@@ -1122,11 +1132,6 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
        name := t.Name()
 
        t.Run("runtime.lock", func(t *testing.T) {
-               mus := make([]runtime.Mutex, 200)
-               var needContention atomic.Int64
-               delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
-               delayMicros := delay.Microseconds()
-
                // The goroutine that acquires the lock will only proceed when it
                // detects that its partner is contended for the lock. That will lead to
                // live-lock if anything (such as a STW) prevents the partner goroutine
@@ -1136,11 +1141,29 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                // will end up contended twice. Instead, disable the GC.
                defer debug.SetGCPercent(debug.SetGCPercent(-1))
 
-               const workers = 2
-               if runtime.GOMAXPROCS(0) < workers {
-                       t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers)
+               mus := make([]runtime.Mutex, 200)
+               var needContention atomic.Int64
+
+               baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
+               fastDelayMicros := baseDelay.Microseconds()
+               slowDelayMicros := baseDelay.Microseconds() * 4
+
+               const (
+                       fastRole = iota
+                       slowRole
+                       workerCount
+               )
+               if runtime.GOMAXPROCS(0) < workerCount {
+                       t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workerCount)
                }
 
+               minTicks := make([][]int64, workerCount) // lower bound, known-contended time, measured by cputicks
+               maxTicks := make([][]int64, workerCount) // upper bound, total lock() duration, measured by cputicks
+               for i := range minTicks {
+                       minTicks[i] = make([]int64, len(mus))
+                       maxTicks[i] = make([]int64, len(mus))
+               }
+               var id atomic.Int32
                fn := func() bool {
                        n := int(needContention.Load())
                        if n < 0 {
@@ -1148,34 +1171,77 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                        }
                        mu := &mus[n]
 
-                       runtime.Lock(mu)
-                       for int(needContention.Load()) == n {
-                               if runtime.MutexContended(mu) {
-                                       // make them wait a little while
-                                       for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
-                                               runtime.Usleep(uint32(delayMicros))
+                       // Each worker has a role: to have a fast or slow critical section.
+                       // Rotate the role assignments as we step through the mutex slice so
+                       // we don't end up with one M always claiming the same kind of work.
+                       id := int(id.Add(1))
+                       role := (id + n) % workerCount
+
+                       marker, delayMicros := fastMarkerFrame, fastDelayMicros
+                       if role == slowRole {
+                               marker, delayMicros = slowMarkerFrame, slowDelayMicros
+                       }
+
+                       // Each lock is used by two different critical sections, one fast
+                       // and one slow, identified in profiles by their different "marker"
+                       // functions. We expect the profile to blame each for the amount of
+                       // delay it inflicts on other users of the lock. We run one worker
+                       // of each kind, so any contention in one would be due to the other.
+                       //
+                       // We measure how long our runtime.lock call takes, which sets an
+                       // upper bound on how much blame to expect for the other worker type
+                       // in the profile. And if we acquire the lock first, we wait for the
+                       // other worker to announce its contention. We measure the
+                       // known-contended time, to use as a lower bound on how much blame
+                       // we expect of ourselves in the profile. Then we stall for a little
+                       // while (different amounts for "fast" versus "slow") before
+                       // unlocking the mutex.
+
+                       marker(func() {
+                               t0 := runtime.Cputicks()
+                               runtime.Lock(mu)
+                               maxTicks[role][n] = runtime.Cputicks() - t0
+                               minTicks[role][n] = 0
+                               for int(needContention.Load()) == n {
+                                       if runtime.MutexContended(mu) {
+                                               t1 := runtime.Cputicks()
+                                               // make them wait a little while
+                                               for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
+                                                       runtime.Usleep(uint32(1 + delayMicros/8))
+                                               }
+                                               minTicks[role][n] = runtime.Cputicks() - t1
+                                               break
                                        }
-                                       break
                                }
-                       }
-                       runtime.Unlock(mu)
-                       needContention.Store(int64(n - 1))
+                               runtime.Unlock(mu)
+                               needContention.Store(int64(n - 1))
+                       })
 
                        return true
                }
 
-               stks := [][]string{{
-                       "runtime.unlock",
-                       "runtime_test." + name + ".func5.1",
-                       "runtime_test.(*contentionWorker).run",
-               }}
+               stks := make([][]string, 2)
+               for i := range stks {
+                       marker := "runtime_test.fastMarkerFrame"
+                       if i == slowRole {
+                               marker = "runtime_test.slowMarkerFrame"
+                       }
+
+                       stks[i] = []string{
+                               "runtime.unlock",
+                               "runtime_test." + name + ".func5.1.1",
+                               marker,
+                               "runtime_test." + name + ".func5.1",
+                               "runtime_test.(*contentionWorker).run",
+                       }
+               }
 
                t.Run("sample-1", func(t *testing.T) {
                        old := runtime.SetMutexProfileFraction(1)
                        defer runtime.SetMutexProfileFraction(old)
 
                        needContention.Store(int64(len(mus) - 1))
-                       metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workers, fn)(t)
+                       metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
                        defer func() {
                                if t.Failed() {
                                        explain()
@@ -1187,7 +1253,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                // gTrackingPeriod, so we don't have a hard lower bound here.
                                testenv.SkipFlaky(t, 64253)
 
-                               if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want {
+                               if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want {
                                        // The test imposes a delay with usleep, verified with calls to
                                        // nanotime. Compare against the runtime/metrics package's view
                                        // (based on nanotime) rather than runtime/pprof's view (based
@@ -1199,10 +1265,68 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
                        }
 
+                       var slowMinTicks, fastMinTicks int64
+                       for role, ticks := range minTicks {
+                               for _, delta := range ticks {
+                                       if role == slowRole {
+                                               slowMinTicks += delta
+                                       } else {
+                                               fastMinTicks += delta
+                                       }
+                               }
+                       }
+                       var slowMaxTicks, fastMaxTicks int64
+                       for role, ticks := range maxTicks {
+                               for _, delta := range ticks {
+                                       if role == slowRole {
+                                               slowMaxTicks += delta
+                                       } else {
+                                               fastMaxTicks += delta
+                                       }
+                               }
+                       }
+
+                       cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9
+                       for _, set := range []struct {
+                               name     string
+                               profTime int64
+                               minTime  int64
+                               maxTime  int64
+                       }{
+                               {
+                                       name:     "slow",
+                                       profTime: profileGrowth[slowRole],
+                                       minTime:  int64(float64(slowMinTicks) / cpuGHz),
+                                       maxTime:  int64(float64(fastMaxTicks) / cpuGHz),
+                               },
+                               {
+                                       name:     "fast",
+                                       profTime: profileGrowth[fastRole],
+                                       minTime:  int64(float64(fastMinTicks) / cpuGHz),
+                                       maxTime:  int64(float64(slowMaxTicks) / cpuGHz),
+                               },
+                       } {
+                               t.Logf("profile's view of delays due to %q critical section:                 %dns", set.name, set.profTime)
+                               t.Logf("test's view of known-contended time within %q critical section:      %dns", set.name, set.minTime)
+                               t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime)
+
+                               if set.profTime < set.minTime {
+                                       t.Errorf("profile undercounted %q critical section", set.name)
+                               }
+                               if set.profTime > set.maxTime {
+                                       t.Errorf("profile overcounted %q critical section", set.name)
+                               }
+                       }
+
+                       var totalProfileGrowth float64
+                       for _, growth := range profileGrowth {
+                               totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
+                       }
+
                        const slop = 1.5 // account for nanotime vs cputicks
                        t.Run("compare timers", func(t *testing.T) {
                                testenv.SkipFlaky(t, 64253)
-                               if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth {
+                               if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth {
                                        t.Errorf("views differ by more than %fx", slop)
                                }
                        })
@@ -1215,7 +1339,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                        defer runtime.SetMutexProfileFraction(old)
 
                        needContention.Store(int64(len(mus) - 1))
-                       metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workers, fn)(t)
+                       metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
                        defer func() {
                                if t.Failed() {
                                        explain()
@@ -1228,7 +1352,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                        // 1e-9 chance of that, which is an acceptably low flakiness rate.
                        const samplingSlop = 2.5
 
-                       if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want {
+                       if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want {
                                // The test imposes a delay with usleep, verified with calls to
                                // nanotime. Compare against the runtime/metrics package's view
                                // (based on nanotime) rather than runtime/pprof's view (based
@@ -1239,8 +1363,13 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
                                t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
                        }
 
+                       var totalProfileGrowth float64
+                       for _, growth := range profileGrowth {
+                               totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
+                       }
+
                        const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
-                       if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth {
+                       if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth {
                                t.Errorf("views differ by more than %fx", timerSlop)
                        }
                })
@@ -1307,6 +1436,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
        })
 }
 
+func slowMarkerFrame(fn func()) { fn() }
+func fastMarkerFrame(fn func()) { fn() }
+
 // contentionWorker provides cleaner call stacks for lock contention profile tests
 type contentionWorker struct {
        before func()