]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: benchmark mutex handoffs
authorRhys Hiltner <rhys.hiltner@gmail.com>
Wed, 31 Jul 2024 20:45:53 +0000 (13:45 -0700)
committerGopher Robot <gobot@golang.org>
Fri, 2 Aug 2024 21:00:51 +0000 (21:00 +0000)
The speed of handing off a mutex to a waiting thread is sensitive to the
configuration of the spinning section of lock2. Measure that latency
directly, to complement our existing benchmarks of mutex throughput.

For #68578

Change-Id: I7637684bcff62eb05cc008491f095f653d13af4b
Reviewed-on: https://go-review.googlesource.com/c/go/+/602176
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Rhys Hiltner <rhys.hiltner@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/runtime/runtime_test.go

index 0b51dd8c8d15b1ce3c7835c0f022b5d755dfa761..6004649ee92b31b86de1bc2385ffdc94a780f491 100644 (file)
@@ -7,6 +7,8 @@ package runtime_test
 import (
        "flag"
        "fmt"
+       "internal/cpu"
+       "internal/runtime/atomic"
        "io"
        . "runtime"
        "runtime/debug"
@@ -561,3 +563,111 @@ func BenchmarkOSYield(b *testing.B) {
                OSYield()
        }
 }
+
+func BenchmarkMutexHandoff(b *testing.B) {
+       testcase := func(delay func(l *Mutex)) func(b *testing.B) {
+               return func(b *testing.B) {
+                       if workers := 2; GOMAXPROCS(0) < workers {
+                               b.Skipf("requires GOMAXPROCS >= %d", workers)
+                       }
+
+                       // Measure latency of mutex handoff between threads.
+                       //
+                       // Hand off a runtime.mutex between two threads, one running a
+                       // "coordinator" goroutine and the other running a "worker"
+                       // goroutine. We don't override the runtime's typical
+                       // goroutine/thread mapping behavior.
+                       //
+                       // Measure the latency, starting when the coordinator enters a call
+                       // to runtime.unlock and ending when the worker's call to
+                       // runtime.lock returns. The benchmark can specify a "delay"
+                       // function to simulate the length of the mutex-holder's critical
+                       // section, including to arrange for the worker's thread to be in
+                       // either the "spinning" or "sleeping" portions of the runtime.lock2
+                       // implementation. Measurement starts after any such "delay".
+                       //
+                       // The two threads' goroutines communicate their current position to
+                       // each other in a non-blocking way via the "turn" state.
+
+                       var state struct {
+                               _    [cpu.CacheLinePadSize]byte
+                               lock Mutex
+                               _    [cpu.CacheLinePadSize]byte
+                               turn atomic.Int64
+                               _    [cpu.CacheLinePadSize]byte
+                       }
+
+                       var delta atomic.Int64
+                       var wg sync.WaitGroup
+
+                       // coordinator:
+                       //  - acquire the mutex
+                       //  - set the turn to 2 mod 4, instructing the worker to begin its Lock call
+                       //  - wait until the mutex is contended
+                       //  - wait a bit more so the worker can commit to its sleep
+                       //  - release the mutex and wait for it to be our turn (0 mod 4) again
+                       wg.Add(1)
+                       go func() {
+                               defer wg.Done()
+                               var t int64
+                               for range b.N {
+                                       Lock(&state.lock)
+                                       state.turn.Add(2)
+                                       delay(&state.lock)
+                                       t -= Nanotime() // start the timer
+                                       Unlock(&state.lock)
+                                       for state.turn.Load()&0x2 != 0 {
+                                       }
+                               }
+                               state.turn.Add(1)
+                               delta.Add(t)
+                       }()
+
+                       // worker:
+                       //  - wait until its our turn (2 mod 4)
+                       //  - acquire and release the mutex
+                       //  - switch the turn counter back to the coordinator (0 mod 4)
+                       wg.Add(1)
+                       go func() {
+                               defer wg.Done()
+                               var t int64
+                               for {
+                                       switch state.turn.Load() & 0x3 {
+                                       case 0:
+                                       case 1, 3:
+                                               delta.Add(t)
+                                               return
+                                       case 2:
+                                               Lock(&state.lock)
+                                               t += Nanotime() // stop the timer
+                                               Unlock(&state.lock)
+                                               state.turn.Add(2)
+                                       }
+                               }
+                       }()
+
+                       wg.Wait()
+                       b.ReportMetric(float64(delta.Load())/float64(b.N), "ns/op")
+               }
+       }
+
+       b.Run("Solo", func(b *testing.B) {
+               var lock Mutex
+               for range b.N {
+                       Lock(&lock)
+                       Unlock(&lock)
+               }
+       })
+
+       b.Run("FastPingPong", testcase(func(l *Mutex) {}))
+       b.Run("SlowPingPong", testcase(func(l *Mutex) {
+               // Wait for the worker to stop spinning and prepare to sleep
+               for !MutexContended(l) {
+               }
+               // Wait a bit longer so the OS can finish committing the worker to its
+               // sleep. Balance consistency against getting enough iterations.
+               const extraNs = 10e3
+               for t0 := Nanotime(); Nanotime()-t0 < extraNs; {
+               }
+       }))
+}