From 1f4db9dbd6b743ffd1f3be350649ddebaad695c1 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@golang.org>
Date: Tue, 6 Jun 2023 12:44:46 -0400
Subject: [PATCH] math/rand/v2: update benchmarks
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Change the benchmarks to use the result of the calls,
as I found that in certain cases inlining resulted in
discarding part of the computation in the benchmark loop.
Add various benchmarks that will be relevant in future CLs.

goos: linux
goarch: amd64
pkg: math/rand/v2
cpu: AMD Ryzen 9 7950X 16-Core Processor
                        â 220860f76f.amd64 â
                        â      sec/op      â
SourceUint64-32                1.555n Â± 1%
GlobalInt64-32                 2.071n Â± 1%
GlobalInt63Parallel-32        0.1023n Â± 1%
GlobalUint64-32                5.193n Â± 1%
GlobalUint64Parallel-32       0.2341n Â± 0%
Int64-32                       2.056n Â± 2%
Uint64-32                      2.077n Â± 2%
GlobalIntN1000-32              4.077n Â± 2%
IntN1000-32                    3.476n Â± 2%
Int64N1000-32                  3.059n Â± 1%
Int64N1e8-32                   2.942n Â± 1%
Int64N1e9-32                   2.932n Â± 1%
Int64N2e9-32                   2.925n Â± 1%
Int64N1e18-32                  3.116n Â± 1%
Int64N2e18-32                  4.067n Â± 1%
Int64N4e18-32                  4.054n Â± 1%
Int32N1000-32                  2.951n Â± 1%
Int32N1e8-32                   3.102n Â± 1%
Int32N1e9-32                   3.535n Â± 1%
Int32N2e9-32                   3.514n Â± 1%
Float32-32                     2.760n Â± 1%
Float64-32                     2.284n Â± 1%
ExpFloat64-32                  3.757n Â± 1%
NormFloat64-32                 3.837n Â± 1%
Perm3-32                       35.23n Â± 2%
Perm30-32                      208.8n Â± 1%
Perm30ViaShuffle-32            111.7n Â± 1%
ShuffleOverhead-32             101.1n Â± 1%
Concurrent-32                  2.108n Â± 7%

goos: darwin
goarch: arm64
pkg: math/rand/v2
cpu: Apple M1
                       â 220860f76f.arm64 â
                       â      sec/op      â
SourceUint64-8                2.316n Â± 1%
GlobalInt64-8                 2.183n Â± 1%
GlobalInt63Parallel-8        0.4331n Â± 0%
GlobalUint64-8                4.377n Â± 2%
GlobalUint64Parallel-8       0.9237n Â± 0%
Int64-8                       2.538n Â± 1%
Uint64-8                      2.604n Â± 1%
GlobalIntN1000-8              3.857n Â± 2%
IntN1000-8                    3.822n Â± 2%
Int64N1000-8                  3.318n Â± 0%
Int64N1e8-8                   3.349n Â± 1%
Int64N1e9-8                   3.317n Â± 2%
Int64N2e9-8                   3.317n Â± 2%
Int64N1e18-8                  3.542n Â± 1%
Int64N2e18-8                  5.087n Â± 0%
Int64N4e18-8                  5.084n Â± 0%
Int32N1000-8                  3.208n Â± 2%
Int32N1e8-8                   3.610n Â± 1%
Int32N1e9-8                   4.235n Â± 0%
Int32N2e9-8                   4.229n Â± 1%
Float32-8                     3.468n Â± 0%
Float64-8                     3.447n Â± 0%
ExpFloat64-8                  4.567n Â± 0%
NormFloat64-8                 4.821n Â± 0%
Perm3-8                       28.89n Â± 0%
Perm30-8                      175.7n Â± 0%
Perm30ViaShuffle-8            153.5n Â± 0%
ShuffleOverhead-8             119.8n Â± 1%
Concurrent-8                  2.433n Â± 3%

goos: linux
goarch: 386
pkg: math/rand/v2
cpu: AMD Ryzen 9 7950X 16-Core Processor
                        â 220860f76f.386 â
                        â     sec/op     â
SourceUint64-32             2.370n Â±  1%
GlobalInt64-32              3.569n Â±  1%
GlobalInt63Parallel-32     0.3221n Â±  1%
GlobalUint64-32             8.797n Â± 10%
GlobalUint64Parallel-32    0.6351n Â±  0%
Int64-32                    2.612n Â±  2%
Uint64-32                   3.350n Â±  1%
GlobalIntN1000-32           5.892n Â±  1%
IntN1000-32                 4.546n Â±  1%
Int64N1000-32               14.59n Â±  1%
Int64N1e8-32                14.76n Â±  2%
Int64N1e9-32                16.57n Â±  1%
Int64N2e9-32                14.54n Â±  1%
Int64N1e18-32               16.14n Â±  1%
Int64N2e18-32               18.10n Â±  1%
Int64N4e18-32               18.65n Â±  1%
Int32N1000-32               3.560n Â±  1%
Int32N1e8-32                3.770n Â±  2%
Int32N1e9-32                4.098n Â±  0%
Int32N2e9-32                4.179n Â±  1%
Float32-32                  21.18n Â±  4%
Float64-32                  20.60n Â±  2%
ExpFloat64-32               13.07n Â±  0%
NormFloat64-32              7.738n Â±  2%
Perm3-32                    36.73n Â±  1%
Perm30-32                   211.9n Â±  1%
Perm30ViaShuffle-32         165.2n Â±  1%
ShuffleOverhead-32          133.9n Â±  1%
Concurrent-32               3.287n Â±  2%

For #61716.

Change-Id: I2f0938eae4b7bf736a8cd899a99783e731bf2179
Reviewed-on: https://go-review.googlesource.com/c/go/+/502496
Auto-Submit: Russ Cox <rsc@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Rob Pike <r@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 src/math/rand/v2/rand_test.go | 250 ++++++++++++++++++++++++++++++----
 1 file changed, 223 insertions(+), 27 deletions(-)

diff --git a/src/math/rand/v2/rand_test.go b/src/math/rand/v2/rand_test.go
index ddb4418935..ab7fb56796 100644
--- a/src/math/rand/v2/rand_test.go
+++ b/src/math/rand/v2/rand_test.go
@@ -13,6 +13,7 @@ import (
 	"os"
 	"runtime"
 	"sync"
+	"sync/atomic"
 	"testing"
 )
 
@@ -355,7 +356,7 @@ func TestFloat32(t *testing.T) {
 		num /= 100 // 1.72 seconds instead of 172 seconds
 	}
 
-	r := New(NewSource(1))
+	r := testRand()
 	for ct := 0; ct < num; ct++ {
 		f := r.Float32()
 		if f >= 1 {
@@ -366,7 +367,7 @@ func TestFloat32(t *testing.T) {
 
 func TestShuffleSmall(t *testing.T) {
 	// Check that Shuffle allows n=0 and n=1, but that swap is never called for them.
-	r := New(NewSource(1))
+	r := testRand()
 	for n := 0; n <= 1; n++ {
 		r.Shuffle(n, func(i, j int) { t.Fatalf("swap called, n=%d i=%d j=%d", n, i, j) })
 	}
@@ -473,94 +474,289 @@ func TestUniformFactorial(t *testing.T) {
 
 // Benchmarks
 
-func BenchmarkInt64Threadsafe(b *testing.B) {
+var Sink uint64
+
+func testRand() *Rand {
+	return New(NewSource(1))
+}
+
+func BenchmarkSourceUint64(b *testing.B) {
+	s := NewSource(1).(Source64)
+	var t uint64
+	for n := b.N; n > 0; n-- {
+		t += s.Uint64()
+	}
+	Sink = uint64(t)
+}
+
+func BenchmarkGlobalInt64(b *testing.B) {
+	var t int64
 	for n := b.N; n > 0; n-- {
-		Int64()
+		t += Int64()
 	}
+	Sink = uint64(t)
 }
 
-func BenchmarkInt64ThreadsafeParallel(b *testing.B) {
+func BenchmarkGlobalInt63Parallel(b *testing.B) {
 	b.RunParallel(func(pb *testing.PB) {
+		var t int64
 		for pb.Next() {
-			Int64()
+			t += Int64()
 		}
+		atomic.AddUint64(&Sink, uint64(t))
 	})
 }
 
-func BenchmarkInt64Unthreadsafe(b *testing.B) {
-	r := New(NewSource(1))
+func BenchmarkGlobalUint64(b *testing.B) {
+	var t uint64
 	for n := b.N; n > 0; n-- {
-		r.Int64()
+		t += Uint64()
 	}
+	Sink = t
+}
+
+func BenchmarkGlobalUint64Parallel(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		var t uint64
+		for pb.Next() {
+			t += Uint64()
+		}
+		atomic.AddUint64(&Sink, t)
+	})
+}
+
+func BenchmarkInt64(b *testing.B) {
+	r := testRand()
+	var t int64
+	for n := b.N; n > 0; n-- {
+		t += r.Int64()
+	}
+	Sink = uint64(t)
+}
+
+var AlwaysFalse = false
+
+func keep[T int | uint | int32 | uint32 | int64 | uint64](x T) T {
+	if AlwaysFalse {
+		return -x
+	}
+	return x
+}
+
+func BenchmarkUint64(b *testing.B) {
+	r := testRand()
+	var t uint64
+	for n := b.N; n > 0; n-- {
+		t += r.Uint64()
+	}
+	Sink = t
+}
+
+func BenchmarkGlobalIntN1000(b *testing.B) {
+	var t int
+	arg := keep(1000)
+	for n := b.N; n > 0; n-- {
+		t += IntN(arg)
+	}
+	Sink = uint64(t)
 }
 
 func BenchmarkIntN1000(b *testing.B) {
-	r := New(NewSource(1))
+	r := testRand()
+	var t int
+	arg := keep(1000)
 	for n := b.N; n > 0; n-- {
-		r.IntN(1000)
+		t += r.IntN(arg)
 	}
+	Sink = uint64(t)
 }
 
 func BenchmarkInt64N1000(b *testing.B) {
-	r := New(NewSource(1))
+	r := testRand()
+	var t int64
+	arg := keep(int64(1000))
+	for n := b.N; n > 0; n-- {
+		t += r.Int64N(arg)
+	}
+	Sink = uint64(t)
+}
+
+func BenchmarkInt64N1e8(b *testing.B) {
+	r := testRand()
+	var t int64
+	arg := keep(int64(1e8))
+	for n := b.N; n > 0; n-- {
+		t += r.Int64N(arg)
+	}
+	Sink = uint64(t)
+}
+
+func BenchmarkInt64N1e9(b *testing.B) {
+	r := testRand()
+	var t int64
+	arg := keep(int64(1e9))
 	for n := b.N; n > 0; n-- {
-		r.Int64N(1000)
+		t += r.Int64N(arg)
 	}
+	Sink = uint64(t)
+}
+
+func BenchmarkInt64N2e9(b *testing.B) {
+	r := testRand()
+	var t int64
+	arg := keep(int64(2e9))
+	for n := b.N; n > 0; n-- {
+		t += r.Int64N(arg)
+	}
+	Sink = uint64(t)
+}
+
+func BenchmarkInt64N1e18(b *testing.B) {
+	r := testRand()
+	var t int64
+	arg := keep(int64(1e18))
+	for n := b.N; n > 0; n-- {
+		t += r.Int64N(arg)
+	}
+	Sink = uint64(t)
+}
+
+func BenchmarkInt64N2e18(b *testing.B) {
+	r := testRand()
+	var t int64
+	arg := keep(int64(2e18))
+	for n := b.N; n > 0; n-- {
+		t += r.Int64N(arg)
+	}
+	Sink = uint64(t)
+}
+
+func BenchmarkInt64N4e18(b *testing.B) {
+	r := testRand()
+	var t int64
+	arg := keep(int64(4e18))
+	for n := b.N; n > 0; n-- {
+		t += r.Int64N(arg)
+	}
+	Sink = uint64(t)
 }
 
 func BenchmarkInt32N1000(b *testing.B) {
-	r := New(NewSource(1))
+	r := testRand()
+	var t int32
+	arg := keep(int32(1000))
 	for n := b.N; n > 0; n-- {
-		r.Int32N(1000)
+		t += r.Int32N(arg)
 	}
+	Sink = uint64(t)
+}
+
+func BenchmarkInt32N1e8(b *testing.B) {
+	r := testRand()
+	var t int32
+	arg := keep(int32(1e8))
+	for n := b.N; n > 0; n-- {
+		t += r.Int32N(arg)
+	}
+	Sink = uint64(t)
+}
+
+func BenchmarkInt32N1e9(b *testing.B) {
+	r := testRand()
+	var t int32
+	arg := keep(int32(1e9))
+	for n := b.N; n > 0; n-- {
+		t += r.Int32N(arg)
+	}
+	Sink = uint64(t)
+}
+
+func BenchmarkInt32N2e9(b *testing.B) {
+	r := testRand()
+	var t int32
+	arg := keep(int32(2e9))
+	for n := b.N; n > 0; n-- {
+		t += r.Int32N(arg)
+	}
+	Sink = uint64(t)
 }
 
 func BenchmarkFloat32(b *testing.B) {
-	r := New(NewSource(1))
+	r := testRand()
+	var t float32
 	for n := b.N; n > 0; n-- {
-		r.Float32()
+		t += r.Float32()
 	}
+	Sink = uint64(t)
 }
 
 func BenchmarkFloat64(b *testing.B) {
-	r := New(NewSource(1))
+	r := testRand()
+	var t float64
 	for n := b.N; n > 0; n-- {
-		r.Float64()
+		t += r.Float64()
 	}
+	Sink = uint64(t)
+}
+
+func BenchmarkExpFloat64(b *testing.B) {
+	r := testRand()
+	var t float64
+	for n := b.N; n > 0; n-- {
+		t += r.ExpFloat64()
+	}
+	Sink = uint64(t)
+}
+
+func BenchmarkNormFloat64(b *testing.B) {
+	r := testRand()
+	var t float64
+	for n := b.N; n > 0; n-- {
+		t += r.NormFloat64()
+	}
+	Sink = uint64(t)
 }
 
 func BenchmarkPerm3(b *testing.B) {
-	r := New(NewSource(1))
+	r := testRand()
+	var t int
 	for n := b.N; n > 0; n-- {
-		r.Perm(3)
+		t += r.Perm(3)[0]
 	}
+	Sink = uint64(t)
+
 }
 
 func BenchmarkPerm30(b *testing.B) {
-	r := New(NewSource(1))
+	r := testRand()
+	var t int
 	for n := b.N; n > 0; n-- {
-		r.Perm(30)
+		t += r.Perm(30)[0]
 	}
+	Sink = uint64(t)
 }
 
 func BenchmarkPerm30ViaShuffle(b *testing.B) {
-	r := New(NewSource(1))
+	r := testRand()
+	var t int
 	for n := b.N; n > 0; n-- {
 		p := make([]int, 30)
 		for i := range p {
 			p[i] = i
 		}
 		r.Shuffle(30, func(i, j int) { p[i], p[j] = p[j], p[i] })
+		t += p[0]
 	}
+	Sink = uint64(t)
 }
 
 // BenchmarkShuffleOverhead uses a minimal swap function
 // to measure just the shuffling overhead.
 func BenchmarkShuffleOverhead(b *testing.B) {
-	r := New(NewSource(1))
+	r := testRand()
 	for n := b.N; n > 0; n-- {
-		r.Shuffle(52, func(i, j int) {
-			if i < 0 || i >= 52 || j < 0 || j >= 52 {
+		r.Shuffle(30, func(i, j int) {
+			if i < 0 || i >= 30 || j < 0 || j >= 30 {
 				b.Fatalf("bad swap(%d, %d)", i, j)
 			}
 		})
-- 
2.52.0