From: Rhys Hiltner <rhys.hiltner@gmail.com>
Date: Mon, 28 Oct 2024 19:21:33 +0000 (-0700)
Subject: runtime: add test for mutex starvation
X-Git-Tag: go1.24rc1~389
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=9fe70bcd65b1380d53d68c9653f973efe8e2657f;p=gostls13.git

runtime: add test for mutex starvation

When multiple threads all need to acquire the same runtime.mutex, make
sure that none of them has to wait for too long. Measure how long a
single thread can capture the mutex, and how long individual other
threads must go between having a turn with the mutex.

For #68578

Change-Id: I56ecc551232f9c2730c128a9f8eeb7bd45c2d3b5
Reviewed-on: https://go-review.googlesource.com/c/go/+/622995
Auto-Submit: Rhys Hiltner <rhys.hiltner@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---

diff --git a/src/runtime/runtime_test.go b/src/runtime/runtime_test.go
index c24f725c0e..f23581acbe 100644
--- a/src/runtime/runtime_test.go
+++ b/src/runtime/runtime_test.go
@@ -10,6 +10,7 @@ import (
 	"internal/cpu"
 	"internal/runtime/atomic"
 	"io"
+	"math/bits"
 	. "runtime"
 	"runtime/debug"
 	"slices"
@@ -606,6 +607,111 @@ func BenchmarkMutexContention(b *testing.B) {
 	wg.Wait()
 }
 
+func BenchmarkMutexCapture(b *testing.B) {
+
+	// Measure mutex fairness.
+	//
+	// Have several threads contend for a single mutex value. Measure how
+	// effectively a single thread is able to capture the lock and report the
+	// duration of those "streak" events. Measure how long other individual
+	// threads need to wait between their turns with the lock. Report the
+	// duration of those "starve" events.
+	//
+	// Report in terms of wall clock time (assuming a constant time per
+	// lock/unlock pair) rather than number of locks/unlocks. This keeps
+	// timekeeping overhead out of the critical path, and avoids giving an
+	// advantage to lock/unlock implementations that take less time per
+	// operation.
+
+	var state struct {
+		_     cpu.CacheLinePad
+		lock  Mutex
+		_     cpu.CacheLinePad
+		count atomic.Int64
+		_     cpu.CacheLinePad
+	}
+
+	procs := GOMAXPROCS(0)
+	var wg sync.WaitGroup
+	histograms := make(chan [2][65]int)
+	for range procs {
+		wg.Add(1)
+		go func() {
+			var (
+				prev      int64
+				streak    int64
+				histogram [2][65]int
+			)
+			for {
+				Lock(&state.lock)
+				ours := state.count.Add(1)
+				Unlock(&state.lock)
+				delta := ours - prev - 1
+				prev = ours
+				if delta == 0 {
+					streak++
+				} else {
+					histogram[0][bits.LeadingZeros64(uint64(streak))]++
+					histogram[1][bits.LeadingZeros64(uint64(delta))]++
+					streak = 1
+				}
+				if ours >= int64(b.N) {
+					wg.Done()
+					if delta == 0 {
+						histogram[0][bits.LeadingZeros64(uint64(streak))]++
+						histogram[1][bits.LeadingZeros64(uint64(delta))]++
+					}
+					histograms <- histogram
+					return
+				}
+			}
+		}()
+	}
+
+	wg.Wait()
+	b.StopTimer()
+
+	var histogram [2][65]int
+	for range procs {
+		h := <-histograms
+		for i := range h {
+			for j := range h[i] {
+				histogram[i][j] += h[i][j]
+			}
+		}
+	}
+
+	percentile := func(h [65]int, p float64) int {
+		sum := 0
+		for i, v := range h {
+			bound := uint64(1<<63) >> i
+			sum += int(bound) * v
+		}
+
+		// Imagine that the longest streak / starvation events were instead half
+		// as long but twice in number. (Note that we've pre-multiplied by the
+		// [lower] "bound" value.) Continue those splits until we meet the
+		// percentile target.
+		part := 0
+		for i, v := range h {
+			bound := uint64(1<<63) >> i
+			part += int(bound) * v
+			// have we trimmed off enough at the head to dip below the percentile goal
+			if float64(sum-part) < float64(sum)*p {
+				return int(bound)
+			}
+		}
+
+		return 0
+	}
+
+	perOp := float64(b.Elapsed().Nanoseconds()) / float64(b.N)
+	b.ReportMetric(perOp*float64(percentile(histogram[0], 1.0)), "ns/streak-p100")
+	b.ReportMetric(perOp*float64(percentile(histogram[0], 0.9)), "ns/streak-p90")
+	b.ReportMetric(perOp*float64(percentile(histogram[1], 1.0)), "ns/starve-p100")
+	b.ReportMetric(perOp*float64(percentile(histogram[1], 0.9)), "ns/starve-p90")
+}
+
 func BenchmarkMutexHandoff(b *testing.B) {
 	testcase := func(delay func(l *Mutex)) func(b *testing.B) {
 		return func(b *testing.B) {