]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: speed up cheaprand and cheaprand64
authorGavin Lam <gavin.oss@tutamail.com>
Fri, 23 Jan 2026 03:44:50 +0000 (03:44 +0000)
committerGopher Robot <gobot@golang.org>
Fri, 23 Jan 2026 22:41:52 +0000 (14:41 -0800)
The current cheaprand performs 128-bit multiplication on 64-bit numbers
and truncate the result to 32 bits, which is inefficient.

A 32-bit specific implementation is more performant because it performs
64-bit multiplication on 32-bit numbers instead.

The current cheaprand64 involves two cheaprand calls.
Implementing it as 64-bit wyrand is significantly faster.

Since cheaprand64 discards one bit, I have preserved this behavior.
The underlying uint64 function is made available as cheaprandu64.

               │    old      │                new           │
               │   sec/op    │   sec/op     vs base         │
Cheaprand-8      1.358n ± 0%   1.218n ± 0%  -10.31% (n=100)
Cheaprand64-8    2.424n ± 0%   1.391n ± 0%  -42.62% (n=100)
Blocksampled-8   8.347n ± 0%   2.022n ± 0%  -75.78% (n=100)

Fixes #77149

Change-Id: Ib0b5da4a642cd34d0401b03c1d343041f8230d11
GitHub-Last-Rev: 549d8d407e2bbcaecdee0b52cbf3a513dda637fb
GitHub-Pull-Request: golang/go#77150
Reviewed-on: https://go-review.googlesource.com/c/go/+/735480
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/runtime/export_test.go
src/runtime/lock_spinbit.go
src/runtime/mprof.go
src/runtime/mprof_test.go [new file with mode: 0644]
src/runtime/rand.go
src/runtime/rand_test.go
src/runtime/runtime2.go

index 4f6ef9a3f262c4289b1f6dd9b62f94a50727381a..fb0bfd3904a9cdabc100a7fbdd247360e1a60d60 100644 (file)
@@ -273,6 +273,10 @@ func CountPagesInUse() (pagesInUse, counted uintptr) {
        return
 }
 
+func Blocksampled(cycles, rate int64) bool { return blocksampled(cycles, rate) }
+
+func Cheaprand() uint32         { return cheaprand() }
+func Cheaprand64() int64        { return cheaprand64() }
 func Fastrand() uint32          { return uint32(rand()) }
 func Fastrand64() uint64        { return rand() }
 func Fastrandn(n uint32) uint32 { return randn(n) }
index 039ea6f5656a8aa45bac2100790c1a2c2179a60f..590be8a0c51f4d90080da2966edca61ced4bb305 100644 (file)
@@ -327,16 +327,8 @@ func unlock2(l *mutex) {
 // mutexSampleContention returns whether the current mutex operation should
 // report any contention it discovers.
 func mutexSampleContention() bool {
-       if rate := int64(atomic.Load64(&mutexprofilerate)); rate <= 0 {
-               return false
-       } else {
-               // TODO: have SetMutexProfileFraction do the clamping
-               rate32 := uint32(rate)
-               if int64(rate32) != rate {
-                       rate32 = ^uint32(0)
-               }
-               return cheaprandn(rate32) == 0
-       }
+       rate := atomic.Load64(&mutexprofilerate)
+       return rate > 0 && cheaprandu64()%rate == 0
 }
 
 // unlock2Wake updates the list of Ms waiting on l, waking an M if necessary.
index 0452b1b605cb74a3e1e1f102ae497de5c56d127a..a4cfef72fa99a25b9b8d236ea47f480ae50bb5d6 100644 (file)
@@ -696,8 +696,8 @@ func (prof *mLockProfile) recordUnlock(cycles int64) {
                if cycles == 0 {
                        return
                }
-               prevScore := uint64(cheaprand64()) % uint64(prev)
-               thisScore := uint64(cheaprand64()) % uint64(cycles)
+               prevScore := cheaprandu64() % uint64(prev)
+               thisScore := cheaprandu64() % uint64(cycles)
                if prevScore > thisScore {
                        prof.cyclesLost += cycles
                        return
diff --git a/src/runtime/mprof_test.go b/src/runtime/mprof_test.go
new file mode 100644 (file)
index 0000000..d819c11
--- /dev/null
@@ -0,0 +1,16 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+       . "runtime"
+       "testing"
+)
+
+func BenchmarkBlocksampled(b *testing.B) {
+       for b.Loop() {
+               Blocksampled(42, 1337)
+       }
+}
index a30845b585e74f06f4c702828bc7f0552cec9110..353e0e0f42471f727a4476eb7a81a0d9c9d59564 100644 (file)
@@ -192,7 +192,8 @@ func mrandinit(mp *m) {
        }
        bootstrapRandReseed() // erase key we just extracted
        mp.chacha8.Init64(seed)
-       mp.cheaprand = rand()
+       mp.cheaprand = uint32(rand())
+       mp.cheaprand64 = rand()
 }
 
 // randn is like rand() % n but faster.
@@ -227,14 +228,12 @@ func randn(n uint32) uint32 {
 func cheaprand() uint32 {
        mp := getg().m
        // Implement wyrand: https://github.com/wangyi-fudan/wyhash
-       // Only the platform that bits.Mul64 can be lowered
-       // by the compiler should be in this list.
-       if goarch.IsAmd64|goarch.IsArm64|goarch.IsPpc64|
-               goarch.IsPpc64le|goarch.IsMips64|goarch.IsMips64le|
-               goarch.IsS390x|goarch.IsRiscv64|goarch.IsLoong64 == 1 {
-               mp.cheaprand += 0xa0761d6478bd642f
-               hi, lo := bits.Mul64(mp.cheaprand, mp.cheaprand^0xe7037ed1a0b428db)
-               return uint32(hi ^ lo)
+       // Only the platform that supports 64-bit multiplication
+       // natively should be allowed.
+       if bits.UintSize == 64 {
+               mp.cheaprand += 0x53c5ca59
+               hi, lo := bits.Mul32(mp.cheaprand, mp.cheaprand^0x74743c1b)
+               return hi ^ lo
        }
 
        // Implement xorshift64+: 2 32-bit xorshift sequences added together.
@@ -242,7 +241,7 @@ func cheaprand() uint32 {
        // Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
        // This generator passes the SmallCrush suite, part of TestU01 framework:
        // http://simul.iro.umontreal.ca/testu01/tu01.html
-       t := (*[2]uint32)(unsafe.Pointer(&mp.cheaprand))
+       t := (*[2]uint32)(unsafe.Pointer(&mp.cheaprand64))
        s1, s0 := t[0], t[1]
        s1 ^= s1 << 17
        s1 = s1 ^ s0 ^ s1>>7 ^ s0>>16
@@ -269,7 +268,33 @@ func cheaprand() uint32 {
 //go:linkname cheaprand64
 //go:nosplit
 func cheaprand64() int64 {
-       return int64(cheaprand())<<31 ^ int64(cheaprand())
+       return int64(cheaprandu64() & ^(uint64(1) << 63))
+}
+
+// cheaprandu64 is a non-cryptographic-quality 64-bit random generator
+// suitable for calling at very high frequency (such as during sampling decisions).
+// it is "cheap" in the sense of both expense and quality.
+//
+// cheaprandu64 must not be exported to other packages:
+// the rule is that other packages using runtime-provided
+// randomness must always use rand.
+//
+//go:nosplit
+func cheaprandu64() uint64 {
+       // Implement wyrand: https://github.com/wangyi-fudan/wyhash
+       // Only the platform that bits.Mul64 can be lowered
+       // by the compiler should be in this list.
+       if goarch.IsAmd64|goarch.IsArm64|goarch.IsPpc64|
+               goarch.IsPpc64le|goarch.IsMips64|goarch.IsMips64le|
+               goarch.IsS390x|goarch.IsRiscv64|goarch.IsLoong64 == 1 {
+               mp := getg().m
+               // Implement wyrand: https://github.com/wangyi-fudan/wyhash
+               mp.cheaprand64 += 0xa0761d6478bd642f
+               hi, lo := bits.Mul64(mp.cheaprand64, mp.cheaprand64^0xe7037ed1a0b428db)
+               return hi ^ lo
+       }
+
+       return uint64(cheaprand())<<32 | uint64(cheaprand())
 }
 
 // cheaprandn is like cheaprand() % n but faster.
index baecb6984d02500a78b07b7b9258c005f6977ea3..79b888fcfac508146258ddd22f248be5eeefbe8a 100644 (file)
@@ -22,6 +22,18 @@ func TestReadRandom(t *testing.T) {
        }
 }
 
+func BenchmarkCheaprand(b *testing.B) {
+       for b.Loop() {
+               Cheaprand()
+       }
+}
+
+func BenchmarkCheaprand64(b *testing.B) {
+       for b.Loop() {
+               Cheaprand64()
+       }
+}
+
 func BenchmarkFastrand(b *testing.B) {
        b.RunParallel(func(pb *testing.PB) {
                for pb.Next() {
index be33932b24d16010920a8f9abb97e87c121ec49f..bf47955d0d6b96ab2dea5aec8148a9636eb5d057 100644 (file)
@@ -715,8 +715,9 @@ type m struct {
 
        mOS
 
-       chacha8   chacha8rand.State
-       cheaprand uint64
+       chacha8     chacha8rand.State
+       cheaprand   uint32
+       cheaprand64 uint64
 
        // Up to 10 locks held by this m, maintained by the lock ranking code.
        locksHeldLen int