From 1f4db9dbd6b743ffd1f3be350649ddebaad695c1 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 6 Jun 2023 12:44:46 -0400 Subject: [PATCH] math/rand/v2: update benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Change the benchmarks to use the result of the calls, as I found that in certain cases inlining resulted in discarding part of the computation in the benchmark loop. Add various benchmarks that will be relevant in future CLs. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 220860f76f.amd64 │ │ sec/op │ SourceUint64-32 1.555n ± 1% GlobalInt64-32 2.071n ± 1% GlobalInt63Parallel-32 0.1023n ± 1% GlobalUint64-32 5.193n ± 1% GlobalUint64Parallel-32 0.2341n ± 0% Int64-32 2.056n ± 2% Uint64-32 2.077n ± 2% GlobalIntN1000-32 4.077n ± 2% IntN1000-32 3.476n ± 2% Int64N1000-32 3.059n ± 1% Int64N1e8-32 2.942n ± 1% Int64N1e9-32 2.932n ± 1% Int64N2e9-32 2.925n ± 1% Int64N1e18-32 3.116n ± 1% Int64N2e18-32 4.067n ± 1% Int64N4e18-32 4.054n ± 1% Int32N1000-32 2.951n ± 1% Int32N1e8-32 3.102n ± 1% Int32N1e9-32 3.535n ± 1% Int32N2e9-32 3.514n ± 1% Float32-32 2.760n ± 1% Float64-32 2.284n ± 1% ExpFloat64-32 3.757n ± 1% NormFloat64-32 3.837n ± 1% Perm3-32 35.23n ± 2% Perm30-32 208.8n ± 1% Perm30ViaShuffle-32 111.7n ± 1% ShuffleOverhead-32 101.1n ± 1% Concurrent-32 2.108n ± 7% goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 220860f76f.arm64 │ │ sec/op │ SourceUint64-8 2.316n ± 1% GlobalInt64-8 2.183n ± 1% GlobalInt63Parallel-8 0.4331n ± 0% GlobalUint64-8 4.377n ± 2% GlobalUint64Parallel-8 0.9237n ± 0% Int64-8 2.538n ± 1% Uint64-8 2.604n ± 1% GlobalIntN1000-8 3.857n ± 2% IntN1000-8 3.822n ± 2% Int64N1000-8 3.318n ± 0% Int64N1e8-8 3.349n ± 1% Int64N1e9-8 3.317n ± 2% Int64N2e9-8 3.317n ± 2% Int64N1e18-8 3.542n ± 1% Int64N2e18-8 5.087n ± 0% Int64N4e18-8 5.084n ± 0% Int32N1000-8 3.208n ± 2% Int32N1e8-8 3.610n ± 1% Int32N1e9-8 4.235n ± 0% Int32N2e9-8 4.229n ± 1% Float32-8 3.468n ± 0% Float64-8 3.447n ± 0% ExpFloat64-8 4.567n ± 0% NormFloat64-8 4.821n ± 0% Perm3-8 28.89n ± 0% Perm30-8 175.7n ± 0% Perm30ViaShuffle-8 153.5n ± 0% ShuffleOverhead-8 119.8n ± 1% Concurrent-8 2.433n ± 3% goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 220860f76f.386 │ │ sec/op │ SourceUint64-32 2.370n ± 1% GlobalInt64-32 3.569n ± 1% GlobalInt63Parallel-32 0.3221n ± 1% GlobalUint64-32 8.797n ± 10% GlobalUint64Parallel-32 0.6351n ± 0% Int64-32 2.612n ± 2% Uint64-32 3.350n ± 1% GlobalIntN1000-32 5.892n ± 1% IntN1000-32 4.546n ± 1% Int64N1000-32 14.59n ± 1% Int64N1e8-32 14.76n ± 2% Int64N1e9-32 16.57n ± 1% Int64N2e9-32 14.54n ± 1% Int64N1e18-32 16.14n ± 1% Int64N2e18-32 18.10n ± 1% Int64N4e18-32 18.65n ± 1% Int32N1000-32 3.560n ± 1% Int32N1e8-32 3.770n ± 2% Int32N1e9-32 4.098n ± 0% Int32N2e9-32 4.179n ± 1% Float32-32 21.18n ± 4% Float64-32 20.60n ± 2% ExpFloat64-32 13.07n ± 0% NormFloat64-32 7.738n ± 2% Perm3-32 36.73n ± 1% Perm30-32 211.9n ± 1% Perm30ViaShuffle-32 165.2n ± 1% ShuffleOverhead-32 133.9n ± 1% Concurrent-32 3.287n ± 2% For #61716. Change-Id: I2f0938eae4b7bf736a8cd899a99783e731bf2179 Reviewed-on: https://go-review.googlesource.com/c/go/+/502496 Auto-Submit: Russ Cox Reviewed-by: Dmitri Shuralyov Reviewed-by: Rob Pike LUCI-TryBot-Result: Go LUCI --- src/math/rand/v2/rand_test.go | 250 ++++++++++++++++++++++++++++++---- 1 file changed, 223 insertions(+), 27 deletions(-) diff --git a/src/math/rand/v2/rand_test.go b/src/math/rand/v2/rand_test.go index ddb4418935..ab7fb56796 100644 --- a/src/math/rand/v2/rand_test.go +++ b/src/math/rand/v2/rand_test.go @@ -13,6 +13,7 @@ import ( "os" "runtime" "sync" + "sync/atomic" "testing" ) @@ -355,7 +356,7 @@ func TestFloat32(t *testing.T) { num /= 100 // 1.72 seconds instead of 172 seconds } - r := New(NewSource(1)) + r := testRand() for ct := 0; ct < num; ct++ { f := r.Float32() if f >= 1 { @@ -366,7 +367,7 @@ func TestFloat32(t *testing.T) { func TestShuffleSmall(t *testing.T) { // Check that Shuffle allows n=0 and n=1, but that swap is never called for them. - r := New(NewSource(1)) + r := testRand() for n := 0; n <= 1; n++ { r.Shuffle(n, func(i, j int) { t.Fatalf("swap called, n=%d i=%d j=%d", n, i, j) }) } @@ -473,94 +474,289 @@ func TestUniformFactorial(t *testing.T) { // Benchmarks -func BenchmarkInt64Threadsafe(b *testing.B) { +var Sink uint64 + +func testRand() *Rand { + return New(NewSource(1)) +} + +func BenchmarkSourceUint64(b *testing.B) { + s := NewSource(1).(Source64) + var t uint64 + for n := b.N; n > 0; n-- { + t += s.Uint64() + } + Sink = uint64(t) +} + +func BenchmarkGlobalInt64(b *testing.B) { + var t int64 for n := b.N; n > 0; n-- { - Int64() + t += Int64() } + Sink = uint64(t) } -func BenchmarkInt64ThreadsafeParallel(b *testing.B) { +func BenchmarkGlobalInt63Parallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { + var t int64 for pb.Next() { - Int64() + t += Int64() } + atomic.AddUint64(&Sink, uint64(t)) }) } -func BenchmarkInt64Unthreadsafe(b *testing.B) { - r := New(NewSource(1)) +func BenchmarkGlobalUint64(b *testing.B) { + var t uint64 for n := b.N; n > 0; n-- { - r.Int64() + t += Uint64() } + Sink = t +} + +func BenchmarkGlobalUint64Parallel(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + var t uint64 + for pb.Next() { + t += Uint64() + } + atomic.AddUint64(&Sink, t) + }) +} + +func BenchmarkInt64(b *testing.B) { + r := testRand() + var t int64 + for n := b.N; n > 0; n-- { + t += r.Int64() + } + Sink = uint64(t) +} + +var AlwaysFalse = false + +func keep[T int | uint | int32 | uint32 | int64 | uint64](x T) T { + if AlwaysFalse { + return -x + } + return x +} + +func BenchmarkUint64(b *testing.B) { + r := testRand() + var t uint64 + for n := b.N; n > 0; n-- { + t += r.Uint64() + } + Sink = t +} + +func BenchmarkGlobalIntN1000(b *testing.B) { + var t int + arg := keep(1000) + for n := b.N; n > 0; n-- { + t += IntN(arg) + } + Sink = uint64(t) } func BenchmarkIntN1000(b *testing.B) { - r := New(NewSource(1)) + r := testRand() + var t int + arg := keep(1000) for n := b.N; n > 0; n-- { - r.IntN(1000) + t += r.IntN(arg) } + Sink = uint64(t) } func BenchmarkInt64N1000(b *testing.B) { - r := New(NewSource(1)) + r := testRand() + var t int64 + arg := keep(int64(1000)) + for n := b.N; n > 0; n-- { + t += r.Int64N(arg) + } + Sink = uint64(t) +} + +func BenchmarkInt64N1e8(b *testing.B) { + r := testRand() + var t int64 + arg := keep(int64(1e8)) + for n := b.N; n > 0; n-- { + t += r.Int64N(arg) + } + Sink = uint64(t) +} + +func BenchmarkInt64N1e9(b *testing.B) { + r := testRand() + var t int64 + arg := keep(int64(1e9)) for n := b.N; n > 0; n-- { - r.Int64N(1000) + t += r.Int64N(arg) } + Sink = uint64(t) +} + +func BenchmarkInt64N2e9(b *testing.B) { + r := testRand() + var t int64 + arg := keep(int64(2e9)) + for n := b.N; n > 0; n-- { + t += r.Int64N(arg) + } + Sink = uint64(t) +} + +func BenchmarkInt64N1e18(b *testing.B) { + r := testRand() + var t int64 + arg := keep(int64(1e18)) + for n := b.N; n > 0; n-- { + t += r.Int64N(arg) + } + Sink = uint64(t) +} + +func BenchmarkInt64N2e18(b *testing.B) { + r := testRand() + var t int64 + arg := keep(int64(2e18)) + for n := b.N; n > 0; n-- { + t += r.Int64N(arg) + } + Sink = uint64(t) +} + +func BenchmarkInt64N4e18(b *testing.B) { + r := testRand() + var t int64 + arg := keep(int64(4e18)) + for n := b.N; n > 0; n-- { + t += r.Int64N(arg) + } + Sink = uint64(t) } func BenchmarkInt32N1000(b *testing.B) { - r := New(NewSource(1)) + r := testRand() + var t int32 + arg := keep(int32(1000)) for n := b.N; n > 0; n-- { - r.Int32N(1000) + t += r.Int32N(arg) } + Sink = uint64(t) +} + +func BenchmarkInt32N1e8(b *testing.B) { + r := testRand() + var t int32 + arg := keep(int32(1e8)) + for n := b.N; n > 0; n-- { + t += r.Int32N(arg) + } + Sink = uint64(t) +} + +func BenchmarkInt32N1e9(b *testing.B) { + r := testRand() + var t int32 + arg := keep(int32(1e9)) + for n := b.N; n > 0; n-- { + t += r.Int32N(arg) + } + Sink = uint64(t) +} + +func BenchmarkInt32N2e9(b *testing.B) { + r := testRand() + var t int32 + arg := keep(int32(2e9)) + for n := b.N; n > 0; n-- { + t += r.Int32N(arg) + } + Sink = uint64(t) } func BenchmarkFloat32(b *testing.B) { - r := New(NewSource(1)) + r := testRand() + var t float32 for n := b.N; n > 0; n-- { - r.Float32() + t += r.Float32() } + Sink = uint64(t) } func BenchmarkFloat64(b *testing.B) { - r := New(NewSource(1)) + r := testRand() + var t float64 for n := b.N; n > 0; n-- { - r.Float64() + t += r.Float64() } + Sink = uint64(t) +} + +func BenchmarkExpFloat64(b *testing.B) { + r := testRand() + var t float64 + for n := b.N; n > 0; n-- { + t += r.ExpFloat64() + } + Sink = uint64(t) +} + +func BenchmarkNormFloat64(b *testing.B) { + r := testRand() + var t float64 + for n := b.N; n > 0; n-- { + t += r.NormFloat64() + } + Sink = uint64(t) } func BenchmarkPerm3(b *testing.B) { - r := New(NewSource(1)) + r := testRand() + var t int for n := b.N; n > 0; n-- { - r.Perm(3) + t += r.Perm(3)[0] } + Sink = uint64(t) + } func BenchmarkPerm30(b *testing.B) { - r := New(NewSource(1)) + r := testRand() + var t int for n := b.N; n > 0; n-- { - r.Perm(30) + t += r.Perm(30)[0] } + Sink = uint64(t) } func BenchmarkPerm30ViaShuffle(b *testing.B) { - r := New(NewSource(1)) + r := testRand() + var t int for n := b.N; n > 0; n-- { p := make([]int, 30) for i := range p { p[i] = i } r.Shuffle(30, func(i, j int) { p[i], p[j] = p[j], p[i] }) + t += p[0] } + Sink = uint64(t) } // BenchmarkShuffleOverhead uses a minimal swap function // to measure just the shuffling overhead. func BenchmarkShuffleOverhead(b *testing.B) { - r := New(NewSource(1)) + r := testRand() for n := b.N; n > 0; n-- { - r.Shuffle(52, func(i, j int) { - if i < 0 || i >= 52 || j < 0 || j >= 52 { + r.Shuffle(30, func(i, j int) { + if i < 0 || i >= 30 || j < 0 || j >= 30 { b.Fatalf("bad swap(%d, %d)", i, j) } }) -- 2.50.0