From 9515cb511a1210e013c26354ea09e786acd61365 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Tue, 28 Feb 2017 10:20:32 -0800 Subject: [PATCH] math/bits: faster TrailingZeroes8 For sizes > 8, the existing code is faster. benchmark old ns/op new ns/op delta BenchmarkTrailingZeros8-8 1.95 1.29 -33.85% Measured on 2.3 GHz Intel Core i7 running macOS 10.12.3. Change-Id: I6f3a33ec633a2c544ec29693c141f2f99335c745 Reviewed-on: https://go-review.googlesource.com/37581 Reviewed-by: Brad Fitzpatrick --- src/math/bits/bits.go | 2 +- src/math/bits/bits_impl.go | 8 -------- src/math/bits/bits_tables.go | 19 +++++++++++++++++++ src/math/bits/make_tables.go | 9 +++++++++ 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/math/bits/bits.go b/src/math/bits/bits.go index 24aa7c5f3f..1aaa9eea9d 100644 --- a/src/math/bits/bits.go +++ b/src/math/bits/bits.go @@ -34,7 +34,7 @@ func LeadingZeros64(x uint64) int { return 64 - blen(uint64(x)) } func TrailingZeros(x uint) int { return ntz(x) } // TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0. -func TrailingZeros8(x uint8) int { return ntz8(x) } +func TrailingZeros8(x uint8) int { return int(ntz8tab[x]) } // TrailingZeros16 returns the number of trailing zero bits in x; the result is 16 for x == 0. func TrailingZeros16(x uint16) int { return ntz16(x) } diff --git a/src/math/bits/bits_impl.go b/src/math/bits/bits_impl.go index e7c1a8a5dc..cf5a12af2b 100644 --- a/src/math/bits/bits_impl.go +++ b/src/math/bits/bits_impl.go @@ -23,14 +23,6 @@ var deBruijn32tab = [32]byte{ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, } -func ntz8(x uint8) (n int) { - if x == 0 { - return 8 - } - // see comment in ntz64 - return int(deBruijn32tab[uint32(x&-x)*deBruijn32>>(32-5)]) -} - func ntz16(x uint16) (n int) { if x == 0 { return 16 diff --git a/src/math/bits/bits_tables.go b/src/math/bits/bits_tables.go index 2c4e31b796..f79f83a01e 100644 --- a/src/math/bits/bits_tables.go +++ b/src/math/bits/bits_tables.go @@ -6,6 +6,25 @@ package bits +var ntz8tab = [256]uint8{ + 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, +} + var pop8tab = [256]uint8{ 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, diff --git a/src/math/bits/make_tables.go b/src/math/bits/make_tables.go index 7127bdf413..c66afb5a96 100644 --- a/src/math/bits/make_tables.go +++ b/src/math/bits/make_tables.go @@ -30,6 +30,7 @@ package bits func main() { buf := bytes.NewBuffer(header) + gen(buf, "ntz8tab", ntz8) gen(buf, "pop8tab", pop8) gen(buf, "rev8tab", rev8) // add more tables as needed @@ -58,6 +59,14 @@ func gen(w io.Writer, name string, f func(uint8) uint8) { fmt.Fprint(w, "\n}\n\n") } +func ntz8(x uint8) (n uint8) { + for x&1 == 0 && n < 8 { + x >>= 1 + n++ + } + return +} + func pop8(x uint8) (n uint8) { for x != 0 { x &= x - 1 -- 2.50.0