From: Robert Griesemer Date: Sat, 18 Feb 2017 19:14:35 +0000 (-0800) Subject: math/bits: faster OnesCount X-Git-Tag: go1.9beta1~1500 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=177dfba1120d2d5976bb5fb5a68bf20bb6ca9ada;p=gostls13.git math/bits: faster OnesCount Using some additional suggestions per "Hacker's Delight". Added documentation and extra tests. Measured on 1.7 GHz Intel Core i7, running macOS 10.12.3. benchmark old ns/op new ns/op delta BenchmarkOnesCount-4 7.34 5.38 -26.70% BenchmarkOnesCount8-4 2.03 1.98 -2.46% BenchmarkOnesCount16-4 2.56 2.50 -2.34% BenchmarkOnesCount32-4 2.98 2.39 -19.80% BenchmarkOnesCount64-4 4.22 2.96 -29.86% Change-Id: I566b0ef766e55cf5776b1662b6016024ebe5d878 Reviewed-on: https://go-review.googlesource.com/37223 Reviewed-by: Matthew Dempsky Run-TryBot: Matthew Dempsky TryBot-Result: Gobot Gobot --- diff --git a/src/math/bits/bits.go b/src/math/bits/bits.go index cec8afcdee..97186adc94 100644 --- a/src/math/bits/bits.go +++ b/src/math/bits/bits.go @@ -50,7 +50,6 @@ const m1 = 0x3333333333333333 // 00110011 ... const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... const m3 = 0x00ff00ff00ff00ff // etc. const m4 = 0x0000ffff0000ffff -const m5 = 0x00000000ffffffff // OnesCount returns the number of one bits ("population count") in x. func OnesCount(x uint) int { @@ -65,7 +64,8 @@ func OnesCount8(x uint8) int { const m = 1<<8 - 1 x = x>>1&(m0&m) + x&(m0&m) x = x>>2&(m1&m) + x&(m1&m) - return int(x>>4 + x&(m2&m)) + x += x >> 4 + return int(x) & (1<<4 - 1) } // OnesCount16 returns the number of one bits ("population count") in x. @@ -73,8 +73,9 @@ func OnesCount16(x uint16) int { const m = 1<<16 - 1 x = x>>1&(m0&m) + x&(m0&m) x = x>>2&(m1&m) + x&(m1&m) - x = x>>4&(m2&m) + x&(m2&m) - return int(x>>8 + x&(m3&m)) + x = (x>>4 + x) & (m2 & m) + x += x >> 8 + return int(x) & (1<<5 - 1) } // OnesCount32 returns the number of one bits ("population count") in x. @@ -82,20 +83,41 @@ func OnesCount32(x uint32) int { const m = 1<<32 - 1 x = x>>1&(m0&m) + x&(m0&m) x = x>>2&(m1&m) + x&(m1&m) - x = x>>4&(m2&m) + x&(m2&m) - x = x>>8&(m3&m) + x&(m3&m) - return int(x>>16 + x&(m4&m)) + x = (x>>4 + x) & (m2 & m) + x += x >> 8 + x += x >> 16 + return int(x) & (1<<6 - 1) } // OnesCount64 returns the number of one bits ("population count") in x. func OnesCount64(x uint64) int { + // Implementation: Parallel summing of adjacent bits. + // See "Hacker's Delight", Chap. 5: Counting Bits. + // The following pattern shows the general approach: + // + // x = x>>1&(m0&m) + x&(m0&m) + // x = x>>2&(m1&m) + x&(m1&m) + // x = x>>4&(m2&m) + x&(m2&m) + // x = x>>8&(m3&m) + x&(m3&m) + // x = x>>16&(m4&m) + x&(m4&m) + // x = x>>32&(m5&m) + x&(m5&m) + // return int(x) + // + // Masking (& operations) can be left away when there's no + // danger that a field's sum will carry over into the next + // field: Since the result cannot be > 64, 8 bits is enough + // and we can ignore the masks for the shifts by 8 and up. + // Per "Hacker's Delight", the first line can be simplified + // more, but it saves at best one instruction, so we leave + // it alone for clarity. const m = 1<<64 - 1 x = x>>1&(m0&m) + x&(m0&m) x = x>>2&(m1&m) + x&(m1&m) - x = x>>4&(m2&m) + x&(m2&m) - x = x>>8&(m3&m) + x&(m3&m) - x = x>>16&(m4&m) + x&(m4&m) - return int(x>>32 + x&(m5&m)) + x = (x>>4 + x) & (m2 & m) + x += x >> 8 + x += x >> 16 + x += x >> 32 + return int(x) & (1<<7 - 1) } // --- RotateLeft --- diff --git a/src/math/bits/bits_test.go b/src/math/bits/bits_test.go index c74e58ebde..50045c246e 100644 --- a/src/math/bits/bits_test.go +++ b/src/math/bits/bits_test.go @@ -232,48 +232,61 @@ func BenchmarkTrailingZeros64(b *testing.B) { } func TestOnesCount(t *testing.T) { + var x uint64 + for i := 0; i <= 64; i++ { + testOnesCount(t, x, i) + x = x<<1 | 1 + } + + for i := 64; i >= 0; i-- { + testOnesCount(t, x, i) + x = x << 1 + } + for i := 0; i < 256; i++ { - want := tab[i].pop for k := 0; k < 64-8; k++ { - x := uint64(i) << uint(k) - if x <= 1<<8-1 { - got := OnesCount8(uint8(x)) - if got != want { - t.Fatalf("OnesCount8(%#02x) == %d; want %d", x, got, want) - } - } + testOnesCount(t, uint64(i)<