From 483533df9e35eea46747c5545307b230eca997b6 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Tue, 14 Sep 2021 10:48:18 +0800 Subject: [PATCH] runtime: using wyrand for fastrand MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit For modern 64-bit CPU architecture multiplier is faster than xorshift darwin/amd64 name old time/op new time/op delta Fastrand 2.13ns ± 1% 1.78ns ± 1% -16.47% (p=0.000 n=9+10) FastrandHashiter 32.5ns ± 4% 32.1ns ± 3% ~ (p=0.277 n=8+9) Fastrandn/2 2.16ns ± 1% 1.99ns ± 1% -7.53% (p=0.000 n=10+10) Fastrandn/3 2.13ns ± 3% 2.00ns ± 1% -5.88% (p=0.000 n=10+10) Fastrandn/4 2.08ns ± 2% 1.98ns ± 2% -4.71% (p=0.000 n=10+10) Fastrandn/5 2.08ns ± 2% 1.98ns ± 1% -4.90% (p=0.000 n=10+9) linux/mips64le name old time/op new time/op delta Fastrand 12.1ns ± 0% 10.8ns ± 1% -10.58% (p=0.000 n=8+10) FastrandHashiter 105ns ± 1% 105ns ± 1% ~ (p=0.138 n=10+10) Fastrandn/2 16.9ns ± 0% 16.4ns ± 4% -2.84% (p=0.020 n=10+10) Fastrandn/3 16.9ns ± 0% 16.4ns ± 3% -3.03% (p=0.000 n=10+10) Fastrandn/4 16.9ns ± 0% 16.5ns ± 2% -2.01% (p=0.002 n=8+10) Fastrandn/5 16.9ns ± 0% 16.4ns ± 3% -2.70% (p=0.000 n=8+10) linux/riscv64 name old time/op new time/op delta Fastrand 22.7ns ± 0% 12.7ns ±19% -44.09% (p=0.000 n=9+10) FastrandHashiter 255ns ± 4% 250ns ± 7% ~ (p=0.363 n=10+10) Fastrandn/2 31.8ns ± 2% 28.5ns ±13% -10.45% (p=0.000 n=10+10) Fastrandn/3 33.0ns ± 2% 27.4ns ± 8% -17.16% (p=0.000 n=9+10) Fastrandn/4 29.6ns ± 3% 28.2ns ± 5% -4.81% (p=0.000 n=8+9) Fastrandn/5 33.4ns ± 3% 26.5ns ± 9% -20.49% (p=0.000 n=8+10) Change-Id: I88ac69625ef923f8be66647e3361e3be986de002 Reviewed-on: https://go-review.googlesource.com/c/go/+/337350 Trust: Meng Zhuo Run-TryBot: Meng Zhuo TryBot-Result: Go Bot Reviewed-by: Keith Randall --- src/runtime/proc.go | 11 ++++------- src/runtime/runtime2.go | 2 +- src/runtime/stubs.go | 17 +++++++++++++++-- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 605e133000..b744d0e71e 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -680,10 +680,10 @@ func schedinit() { moduledataverify() stackinit() mallocinit() + cpuinit() // must run before alginit + alginit() // maps, hash, fastrand must not be used before this call fastrandinit() // must run before mcommoninit mcommoninit(_g_.m, -1) - cpuinit() // must run before alginit - alginit() // maps must not be used before this call modulesinit() // provides activeModules typelinksinit() // uses maps, activeModules itabsinit() // uses activeModules @@ -787,11 +787,8 @@ func mcommoninit(mp *m, id int64) { mp.id = mReserveID() } - mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed)) - mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed)) - if mp.fastrand[0]|mp.fastrand[1] == 0 { - mp.fastrand[1] = 1 - } + // cputicks is not very random in startup virtual machine + mp.fastrand = uint64(int64Hash(uint64(mp.id), fastrandseed^uintptr(cputicks()))) mpreinit(mp) if mp.gsignal != nil { diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 271d57e5d0..04bd5cb887 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -538,7 +538,7 @@ type m struct { printlock int8 incgo bool // m is executing a cgo call freeWait uint32 // if == 0, safe to free g0 and delete m (atomic) - fastrand [2]uint32 + fastrand uint64 needextram bool traceback uint8 ncgocall uint64 // number of cgo calls in total diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go index c85b7d7330..91380a0d51 100644 --- a/src/runtime/stubs.go +++ b/src/runtime/stubs.go @@ -8,6 +8,7 @@ import ( "internal/abi" "internal/goarch" "internal/goexperiment" + "runtime/internal/math" "unsafe" ) @@ -124,15 +125,27 @@ const hashLoad = float32(loadFactorNum) / float32(loadFactorDen) //go:nosplit func fastrand() uint32 { mp := getg().m + // Implement wyrand: https://github.com/wangyi-fudan/wyhash + // Only the platform that math.Mul64 can be lowered + // by the compiler should be in this list. + if goarch.IsAmd64|goarch.IsArm64|goarch.IsPpc64| + goarch.IsPpc64le|goarch.IsMips64|goarch.IsMips64le| + goarch.IsS390x|goarch.IsRiscv64 == 1 { + mp.fastrand += 0xa0761d6478bd642f + hi, lo := math.Mul64(mp.fastrand, mp.fastrand^0xe7037ed1a0b428db) + return uint32(hi ^ lo) + } + // Implement xorshift64+: 2 32-bit xorshift sequences added together. // Shift triplet [17,7,16] was calculated as indicated in Marsaglia's // Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf // This generator passes the SmallCrush suite, part of TestU01 framework: // http://simul.iro.umontreal.ca/testu01/tu01.html - s1, s0 := mp.fastrand[0], mp.fastrand[1] + t := (*[2]uint32)(unsafe.Pointer(&mp.fastrand)) + s1, s0 := t[0], t[1] s1 ^= s1 << 17 s1 = s1 ^ s0 ^ s1>>7 ^ s0>>16 - mp.fastrand[0], mp.fastrand[1] = s0, s1 + t[0], t[1] = s0, s1 return s0 + s1 } -- 2.50.0