]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: speed up fastrand() % n
authorJosh Bleecher Snyder <josharian@gmail.com>
Mon, 13 Feb 2017 20:46:17 +0000 (12:46 -0800)
committerJosh Bleecher Snyder <josharian@gmail.com>
Tue, 14 Feb 2017 00:01:22 +0000 (00:01 +0000)
This occurs a fair amount in the runtime for non-power-of-two n.
Use an alternative, faster formulation.

name           old time/op  new time/op  delta
Fastrandn/2-8  4.45ns ± 2%  2.09ns ± 3%  -53.12%  (p=0.000 n=14+14)
Fastrandn/3-8  4.78ns ±11%  2.06ns ± 2%  -56.94%  (p=0.000 n=15+15)
Fastrandn/4-8  4.76ns ± 9%  1.99ns ± 3%  -58.28%  (p=0.000 n=15+13)
Fastrandn/5-8  4.96ns ±13%  2.03ns ± 6%  -59.14%  (p=0.000 n=15+15)

name                    old time/op  new time/op  delta
SelectUncontended-8     33.7ns ± 2%  33.9ns ± 2%  +0.70%  (p=0.000 n=49+50)
SelectSyncContended-8   1.68µs ± 4%  1.65µs ± 4%  -1.54%  (p=0.000 n=50+45)
SelectAsyncContended-8   282ns ± 1%   277ns ± 1%  -1.50%  (p=0.000 n=48+43)
SelectNonblock-8        5.31ns ± 1%  5.32ns ± 1%    ~     (p=0.275 n=45+44)
SelectProdCons-8         585ns ± 3%   577ns ± 2%  -1.35%  (p=0.000 n=50+50)
GoroutineSelect-8       1.59ms ± 2%  1.59ms ± 1%    ~     (p=0.084 n=49+48)

Updates #16213

Change-Id: Ib555a4d7da2042a25c3976f76a436b536487d5b7
Reviewed-on: https://go-review.googlesource.com/36932
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/runtime/export_test.go
src/runtime/mgc.go
src/runtime/proc.go
src/runtime/rand_test.go
src/runtime/select.go
src/runtime/stubs.go
src/runtime/symtab.go

index 5f85d91f5e42fcf42c36096afaaf9f2d5b0ad6bc..985cd7f851016ad49cfdf4f4e09e31eb2eb3e0a3 100644 (file)
@@ -246,4 +246,5 @@ func CountPagesInUse() (pagesInUse, counted uintptr) {
        return
 }
 
-func Fastrand() uint32 { return fastrand() }
+func Fastrand() uint32          { return fastrand() }
+func Fastrandn(n uint32) uint32 { return fastrandn(n) }
index 527df1750ae1368fd94d36c5134f32bd7b42ca0b..cb0d305899b70ca1c7d19456401019f41d9b9cdb 100644 (file)
@@ -648,7 +648,7 @@ func (c *gcControllerState) enlistWorker() {
        }
        myID := gp.m.p.ptr().id
        for tries := 0; tries < 5; tries++ {
-               id := int32(fastrand() % uint32(gomaxprocs-1))
+               id := int32(fastrandn(uint32(gomaxprocs - 1)))
                if id >= myID {
                        id++
                }
index 23626f19a965e7da2657d60d96cf9cfbe0c13535..e71ebcd7a7306e54378273ba1cc8b62f1ced87ad 100644 (file)
@@ -4280,7 +4280,7 @@ func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
 
        if randomizeScheduler {
                for i := uint32(1); i <= n; i++ {
-                       j := fastrand() % (i + 1)
+                       j := fastrandn(i + 1)
                        batch[i], batch[j] = batch[j], batch[i]
                }
        }
index 0f6ec0f2ecb6c5c3ad925780ebbf02078a593e7f..f8831b05f9fd62303755a5363886a919d1f7fdbb 100644 (file)
@@ -6,6 +6,7 @@ package runtime_test
 
 import (
        . "runtime"
+       "strconv"
        "testing"
 )
 
@@ -30,3 +31,15 @@ func BenchmarkFastrandHashiter(b *testing.B) {
                }
        })
 }
+
+var sink32 uint32
+
+func BenchmarkFastrandn(b *testing.B) {
+       for n := uint32(2); n <= 5; n++ {
+               b.Run(strconv.Itoa(int(n)), func(b *testing.B) {
+                       for i := 0; i < b.N; i++ {
+                               sink32 = Fastrandn(n)
+                       }
+               })
+       }
+}
index 4a744a19678953a0cb30b77c779f328cc6adbb8b..1ace6dc5c3cd1bb5c47eff48a2075ee3c0983bb4 100644 (file)
@@ -270,7 +270,7 @@ func selectgoImpl(sel *hselect) (uintptr, uint16) {
        pollslice := slice{unsafe.Pointer(sel.pollorder), int(sel.ncase), int(sel.ncase)}
        pollorder := *(*[]uint16)(unsafe.Pointer(&pollslice))
        for i := 1; i < int(sel.ncase); i++ {
-               j := fastrand() % uint32(i+1)
+               j := fastrandn(uint32(i + 1))
                pollorder[i] = pollorder[j]
                pollorder[j] = uint16(i)
        }
index e839c59d55f29b3fbcd0c8417c4fb8af4bff2c4a..ff230b8e555659cff9ee7cc8323815497976c863 100644 (file)
@@ -103,6 +103,13 @@ func fastrand() uint32 {
        return fr
 }
 
+//go:nosplit
+func fastrandn(n uint32) uint32 {
+       // This is similar to fastrand() % n, but faster.
+       // See http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+       return uint32(uint64(fastrand()) * uint64(n) >> 32)
+}
+
 //go:linkname sync_fastrand sync.fastrand
 func sync_fastrand() uint32 { return fastrand() }
 
index ed82783ca969f328c2ec11ad0e1ffd35710b2203..377d970f09ef383cefeb7938beef888c6b4a2a50 100644 (file)
@@ -549,7 +549,7 @@ func pcvalue(f *_func, off int32, targetpc uintptr, cache *pcvalueCache, strict
                        // a recursive stack's cycle is slightly
                        // larger than the cache.
                        if cache != nil {
-                               ci := fastrand() % uint32(len(cache.entries))
+                               ci := fastrandn(uint32(len(cache.entries)))
                                cache.entries[ci] = pcvalueCacheEnt{
                                        targetpc: targetpc,
                                        off:      off,