// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build !386
-
-// TODO finish intrinsifying 386, deadcode the assembly, remove build tags, merge w/ intrinsics_common
-
package sys
// Copied from math/bits to avoid dependence.
func TrailingZeros8(x uint8) int {
return int(ntz8tab[x])
}
+
+const len8tab = "" +
+ "\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" +
+ "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" +
+ "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
+ "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
+ "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+ "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+ "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+ "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
+ "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
+
+// Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+//
+// nosplit because this is used in src/runtime/histogram.go, which make run in sensitive contexts.
+//
+//go:nosplit
+func Len64(x uint64) (n int) {
+ if x >= 1<<32 {
+ x >>= 32
+ n = 32
+ }
+ if x >= 1<<16 {
+ x >>= 16
+ n += 16
+ }
+ if x >= 1<<8 {
+ x >>= 8
+ n += 8
+ }
+ return n + int(len8tab[x])
+}
+
+// --- OnesCount ---
+
+const m0 = 0x5555555555555555 // 01010101 ...
+const m1 = 0x3333333333333333 // 00110011 ...
+const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
+
+// OnesCount64 returns the number of one bits ("population count") in x.
+func OnesCount64(x uint64) int {
+ // Implementation: Parallel summing of adjacent bits.
+ // See "Hacker's Delight", Chap. 5: Counting Bits.
+ // The following pattern shows the general approach:
+ //
+ // x = x>>1&(m0&m) + x&(m0&m)
+ // x = x>>2&(m1&m) + x&(m1&m)
+ // x = x>>4&(m2&m) + x&(m2&m)
+ // x = x>>8&(m3&m) + x&(m3&m)
+ // x = x>>16&(m4&m) + x&(m4&m)
+ // x = x>>32&(m5&m) + x&(m5&m)
+ // return int(x)
+ //
+ // Masking (& operations) can be left away when there's no
+ // danger that a field's sum will carry over into the next
+ // field: Since the result cannot be > 64, 8 bits is enough
+ // and we can ignore the masks for the shifts by 8 and up.
+ // Per "Hacker's Delight", the first line can be simplified
+ // more, but it saves at best one instruction, so we leave
+ // it alone for clarity.
+ const m = 1<<64 - 1
+ x = x>>1&(m0&m) + x&(m0&m)
+ x = x>>2&(m1&m) + x&(m1&m)
+ x = (x>>4 + x) & (m2 & m)
+ x += x >> 8
+ x += x >> 16
+ x += x >> 32
+ return int(x) & (1<<7 - 1)
+}
+
+// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
+func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
+
+// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
+func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
+
+// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len8(x uint8) int {
+ return int(len8tab[x])
+}
+
+// Bswap64 returns its input with byte order reversed
+// 0x0102030405060708 -> 0x0807060504030201
+func Bswap64(x uint64) uint64 {
+ c8 := uint64(0x00ff00ff00ff00ff)
+ a := x >> 8 & c8
+ b := (x & c8) << 8
+ x = a | b
+ c16 := uint64(0x0000ffff0000ffff)
+ a = x >> 16 & c16
+ b = (x & c16) << 16
+ x = a | b
+ c32 := uint64(0x00000000ffffffff)
+ a = x >> 32 & c32
+ b = (x & c32) << 32
+ x = a | b
+ return x
+}
+
+// Bswap32 returns its input with byte order reversed
+// 0x01020304 -> 0x04030201
+func Bswap32(x uint32) uint32 {
+ c8 := uint32(0x00ff00ff)
+ a := x >> 8 & c8
+ b := (x & c8) << 8
+ x = a | b
+ c16 := uint32(0x0000ffff)
+ a = x >> 16 & c16
+ b = (x & c16) << 16
+ x = a | b
+ return x
+}
+
+// Prefetch prefetches data from memory addr to cache
+//
+// AMD64: Produce PREFETCHT0 instruction
+//
+// ARM64: Produce PRFM instruction with PLDL1KEEP option
+func Prefetch(addr uintptr) {}
+
+// PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed.
+// That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache.
+//
+// AMD64: Produce PREFETCHNTA instruction
+//
+// ARM64: Produce PRFM instruction with PLDL1STRM option
+func PrefetchStreamed(addr uintptr) {}
+++ /dev/null
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-TEXT runtime∕internal∕sys·TrailingZeros64(SB), NOSPLIT, $0-12
- // Try low 32 bits.
- MOVL x_lo+0(FP), AX
- BSFL AX, AX
- JZ tryhigh
- MOVL AX, ret+8(FP)
- RET
-
-tryhigh:
- // Try high 32 bits.
- MOVL x_hi+4(FP), AX
- BSFL AX, AX
- JZ none
- ADDL $32, AX
- MOVL AX, ret+8(FP)
- RET
-
-none:
- // No bits are set.
- MOVL $64, ret+8(FP)
- RET
-
-TEXT runtime∕internal∕sys·TrailingZeros32(SB), NOSPLIT, $0-8
- MOVL x+0(FP), AX
- BSFL AX, AX
- JNZ 2(PC)
- MOVL $32, AX
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime∕internal∕sys·TrailingZeros8(SB), NOSPLIT, $0-8
- MOVBLZX x+0(FP), AX
- BSFL AX, AX
- JNZ 2(PC)
- MOVL $8, AX
- MOVL AX, ret+4(FP)
- RET
+++ /dev/null
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sys
-
-// Copied from math/bits to avoid dependence.
-
-const len8tab = "" +
- "\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" +
- "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" +
- "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
- "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
- "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
- "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
- "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
- "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
- "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
- "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
- "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
- "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
- "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
- "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
- "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
- "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
-
-// Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
-//
-// nosplit because this is used in src/runtime/histogram.go, which make run in sensitive contexts.
-//
-//go:nosplit
-func Len64(x uint64) (n int) {
- if x >= 1<<32 {
- x >>= 32
- n = 32
- }
- if x >= 1<<16 {
- x >>= 16
- n += 16
- }
- if x >= 1<<8 {
- x >>= 8
- n += 8
- }
- return n + int(len8tab[x])
-}
-
-// --- OnesCount ---
-
-const m0 = 0x5555555555555555 // 01010101 ...
-const m1 = 0x3333333333333333 // 00110011 ...
-const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
-
-// OnesCount64 returns the number of one bits ("population count") in x.
-func OnesCount64(x uint64) int {
- // Implementation: Parallel summing of adjacent bits.
- // See "Hacker's Delight", Chap. 5: Counting Bits.
- // The following pattern shows the general approach:
- //
- // x = x>>1&(m0&m) + x&(m0&m)
- // x = x>>2&(m1&m) + x&(m1&m)
- // x = x>>4&(m2&m) + x&(m2&m)
- // x = x>>8&(m3&m) + x&(m3&m)
- // x = x>>16&(m4&m) + x&(m4&m)
- // x = x>>32&(m5&m) + x&(m5&m)
- // return int(x)
- //
- // Masking (& operations) can be left away when there's no
- // danger that a field's sum will carry over into the next
- // field: Since the result cannot be > 64, 8 bits is enough
- // and we can ignore the masks for the shifts by 8 and up.
- // Per "Hacker's Delight", the first line can be simplified
- // more, but it saves at best one instruction, so we leave
- // it alone for clarity.
- const m = 1<<64 - 1
- x = x>>1&(m0&m) + x&(m0&m)
- x = x>>2&(m1&m) + x&(m1&m)
- x = (x>>4 + x) & (m2 & m)
- x += x >> 8
- x += x >> 16
- x += x >> 32
- return int(x) & (1<<7 - 1)
-}
-
-// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
-func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
-
-// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
-func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
-
-// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
-func Len8(x uint8) int {
- return int(len8tab[x])
-}
-
-// Bswap64 returns its input with byte order reversed
-// 0x0102030405060708 -> 0x0807060504030201
-func Bswap64(x uint64) uint64 {
- c8 := uint64(0x00ff00ff00ff00ff)
- a := x >> 8 & c8
- b := (x & c8) << 8
- x = a | b
- c16 := uint64(0x0000ffff0000ffff)
- a = x >> 16 & c16
- b = (x & c16) << 16
- x = a | b
- c32 := uint64(0x00000000ffffffff)
- a = x >> 32 & c32
- b = (x & c32) << 32
- x = a | b
- return x
-}
-
-// Bswap32 returns its input with byte order reversed
-// 0x01020304 -> 0x04030201
-func Bswap32(x uint32) uint32 {
- c8 := uint32(0x00ff00ff)
- a := x >> 8 & c8
- b := (x & c8) << 8
- x = a | b
- c16 := uint32(0x0000ffff)
- a = x >> 16 & c16
- b = (x & c16) << 16
- x = a | b
- return x
-}
-
-// Prefetch prefetches data from memory addr to cache
-//
-// AMD64: Produce PREFETCHT0 instruction
-//
-// ARM64: Produce PRFM instruction with PLDL1KEEP option
-func Prefetch(addr uintptr) {}
-
-// PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed.
-// That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache.
-//
-// AMD64: Produce PREFETCHNTA instruction
-//
-// ARM64: Produce PRFM instruction with PLDL1STRM option
-func PrefetchStreamed(addr uintptr) {}
+++ /dev/null
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build 386
-
-package sys
-
-func TrailingZeros64(x uint64) int
-func TrailingZeros32(x uint32) int
-func TrailingZeros8(x uint8) int