]> Cypherpunks repositories - gostls13.git/commitdiff
math/big: slight improvement to algorithm used for internal bitLen function
authorDavid G. Andersen <dave.andersen@gmail.com>
Mon, 23 Jan 2012 21:46:28 +0000 (13:46 -0800)
committerRobert Griesemer <gri@golang.org>
Mon, 23 Jan 2012 21:46:28 +0000 (13:46 -0800)
The bitLen function currently shifts out blocks of 8 bits at a time.
This change replaces this sorta-linear algorithm with a log(N)
one (shift out 16 bits, then 8, then 4, then 2, then 1).
I left the start of it linear at 16 bits at a time so that
the function continues to work with 32 or 64 bit values
without any funkiness.
The algorithm is similar to several of the nlz ("number of
leading zeros") algorithms from "Hacker's Delight" or the
"bit twiddling hacks" pages.

Doesn't make a big difference to the existing benchmarks, but
I'm using the code in a different context that calls bitLen
much more often, so it seemed worthwhile making the existing
codebase faster so that it's a better building block.

Microbenchmark results on a 64-bit Macbook Pro using 6g from weekly.2012-01-20:

benchmark                old ns/op    new ns/op    delta
big.BenchmarkBitLen0             4            6  +50.12%
big.BenchmarkBitLen1             4            6  +33.91%
big.BenchmarkBitLen2             6            6   +3.05%
big.BenchmarkBitLen3             7            6  -19.05%
big.BenchmarkBitLen4             9            6  -30.19%
big.BenchmarkBitLen5            11            6  -42.23%
big.BenchmarkBitLen8            16            6  -61.78%
big.BenchmarkBitLen9             5            6  +18.29%
big.BenchmarkBitLen16           18            7  -60.99%
big.BenchmarkBitLen17            7            6   -4.64%
big.BenchmarkBitLen31           19            7  -62.49%

On an ARM machine (with the previous weekly):

benchmark                old ns/op    new ns/op    delta
big.BenchmarkBitLen0            37           50  +36.56%
big.BenchmarkBitLen1            59           51  -13.69%
big.BenchmarkBitLen2            74           59  -20.40%
big.BenchmarkBitLen3            92           60  -34.89%
big.BenchmarkBitLen4           110           59  -46.09%
big.BenchmarkBitLen5           127           60  -52.68%
big.BenchmarkBitLen8           181           59  -67.24%
big.BenchmarkBitLen9            78           60  -23.05%
big.BenchmarkBitLen16          199           69  -65.13%
big.BenchmarkBitLen17           91           70  -23.17%
big.BenchmarkBitLen31          210           95  -54.43%

R=golang-dev, dave, edsrzf, gri
CC=golang-dev
https://golang.org/cl/5570044

src/pkg/math/big/arith.go
src/pkg/math/big/arith_test.go

index 0a02a4ef59bd106e7e0006da8ea893a2cf79b7cc..f30951ef0f6c409e28c0659b22a0c132f4e38e3e 100644 (file)
@@ -80,10 +80,22 @@ func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
 
 // Length of x in bits.
 func bitLen(x Word) (n int) {
-       for ; x >= 0x100; x >>= 8 {
+       for ; x >= 0x8000; x >>= 16 {
+               n += 16
+       }
+       if x >= 0x80 {
+               x >>= 8
                n += 8
        }
-       for ; x > 0; x >>= 1 {
+       if x >= 0x8 {
+               x >>= 4
+               n += 4
+       }
+       if x >= 0x2 {
+               x >>= 2
+               n += 2
+       }
+       if x >= 0x1 {
                n++
        }
        return
index b6c56c39ef4756a65ec1dbee6f76aec203c3e498..106cd92d856ce7caa5318f4e3fe1188bbe0c9a9e 100644 (file)
@@ -333,3 +333,25 @@ func TestMulAddWWW(t *testing.T) {
                }
        }
 }
+
+// runs b.N iterations of bitLen called on a Word containing (1 << nbits)-1.
+func benchmarkBitLenN(b *testing.B, nbits uint) {
+       testword := Word((uint64(1) << nbits) - 1)
+       for i := 0; i < b.N; i++ {
+               bitLen(testword)
+       }
+}
+
+// Individual bitLen tests.  Numbers chosen to examine both sides
+// of powers-of-two boundaries.
+func BenchmarkBitLen0(b *testing.B)  { benchmarkBitLenN(b, 0) }
+func BenchmarkBitLen1(b *testing.B)  { benchmarkBitLenN(b, 1) }
+func BenchmarkBitLen2(b *testing.B)  { benchmarkBitLenN(b, 2) }
+func BenchmarkBitLen3(b *testing.B)  { benchmarkBitLenN(b, 3) }
+func BenchmarkBitLen4(b *testing.B)  { benchmarkBitLenN(b, 4) }
+func BenchmarkBitLen5(b *testing.B)  { benchmarkBitLenN(b, 5) }
+func BenchmarkBitLen8(b *testing.B)  { benchmarkBitLenN(b, 8) }
+func BenchmarkBitLen9(b *testing.B)  { benchmarkBitLenN(b, 9) }
+func BenchmarkBitLen16(b *testing.B) { benchmarkBitLenN(b, 16) }
+func BenchmarkBitLen17(b *testing.B) { benchmarkBitLenN(b, 17) }
+func BenchmarkBitLen31(b *testing.B) { benchmarkBitLenN(b, 31) }