From: Cuong Manh Le Date: Thu, 16 Feb 2023 07:35:25 +0000 (+0700) Subject: hash/maphash: add purego implementation X-Git-Tag: go1.21rc1~1431 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=9b221f9350b555a1e8c01c08a88ed6590ca11a17;p=gostls13.git hash/maphash: add purego implementation Fixes #47342 Change-Id: I9c3b36de3e7c85d3e7aff2f84abc78a1b382008c Reviewed-on: https://go-review.googlesource.com/c/go/+/468795 Run-TryBot: Cuong Manh Le TryBot-Result: Gopher Robot Reviewed-by: Keith Randall Reviewed-by: Dmitri Shuralyov Auto-Submit: Cuong Manh Le Reviewed-by: Cherry Mui --- diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go index a906c0dbdb..a52457efcc 100644 --- a/src/cmd/dist/test.go +++ b/src/cmd/dist/test.go @@ -655,6 +655,12 @@ func (t *tester) registerTests() { tags: []string{"osusergo"}, pkg: "os/user", }) + t.registerTest("purego:hash/maphash", "hash/maphash purego implementation", + &goTest{ + timeout: 300 * time.Second, + tags: []string{"purego"}, + pkg: "hash/maphash", + }) } // Test ios/amd64 for the iOS simulator. diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index 4ac2086857..36091d5a94 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -217,7 +217,7 @@ var depsRules = ` # hashes io < hash - < hash/adler32, hash/crc32, hash/crc64, hash/fnv, hash/maphash; + < hash/adler32, hash/crc32, hash/crc64, hash/fnv; # math/big FMT, encoding/binary, math/rand @@ -472,6 +472,9 @@ var depsRules = ` crypto/tls < net/smtp; + crypto/rand + < hash/maphash; # for purego implementation + # HTTP, King of Dependencies. FMT diff --git a/src/hash/maphash/maphash.go b/src/hash/maphash/maphash.go index 690068a70a..c2e9e400b9 100644 --- a/src/hash/maphash/maphash.go +++ b/src/hash/maphash/maphash.go @@ -12,10 +12,6 @@ // (See crypto/sha256 and crypto/sha512 for cryptographic use.) package maphash -import ( - "unsafe" -) - // A Seed is a random value that selects the specific hash function // computed by a Hash. If two Hashes use the same Seeds, they // will compute the same hash values for any given input. @@ -44,17 +40,15 @@ func Bytes(seed Seed, b []byte) uint64 { if state == 0 { panic("maphash: use of uninitialized Seed") } - if len(b) == 0 { - return rthash(nil, 0, state) // avoid &b[0] index panic below - } + if len(b) > bufSize { b = b[:len(b):len(b)] // merge len and cap calculations when reslicing for len(b) > bufSize { - state = rthash(&b[0], bufSize, state) + state = rthash(b[:bufSize], state) b = b[bufSize:] } } - return rthash(&b[0], len(b), state) + return rthash(b, state) } // String returns the hash of s with the given seed. @@ -71,12 +65,10 @@ func String(seed Seed, s string) uint64 { panic("maphash: use of uninitialized Seed") } for len(s) > bufSize { - p := (*byte)(unsafe.StringData(s)) - state = rthash(p, bufSize, state) + state = rthashString(s[:bufSize], state) s = s[bufSize:] } - p := (*byte)(unsafe.StringData(s)) - return rthash(p, len(s), state) + return rthashString(s, state) } // A Hash computes a seeded hash of a byte sequence. @@ -162,7 +154,7 @@ func (h *Hash) Write(b []byte) (int, error) { if len(b) > bufSize { h.initSeed() for len(b) > bufSize { - h.state.s = rthash(&b[0], bufSize, h.state.s) + h.state.s = rthash(b[:bufSize], h.state.s) b = b[bufSize:] } } @@ -189,8 +181,7 @@ func (h *Hash) WriteString(s string) (int, error) { if len(s) > bufSize { h.initSeed() for len(s) > bufSize { - ptr := (*byte)(unsafe.StringData(s)) - h.state.s = rthash(ptr, bufSize, h.state.s) + h.state.s = rthashString(s[:bufSize], h.state.s) s = s[bufSize:] } } @@ -233,7 +224,7 @@ func (h *Hash) flush() { panic("maphash: flush of partially full buffer") } h.initSeed() - h.state.s = rthash(&h.buf[0], h.n, h.state.s) + h.state.s = rthash(h.buf[:h.n], h.state.s) h.n = 0 } @@ -246,14 +237,14 @@ func (h *Hash) flush() { // by using bit masking, shifting, or modular arithmetic. func (h *Hash) Sum64() uint64 { h.initSeed() - return rthash(&h.buf[0], h.n, h.state.s) + return rthash(h.buf[:h.n], h.state.s) } // MakeSeed returns a new random seed. func MakeSeed() Seed { var s uint64 for { - s = runtime_fastrand64() + s = randUint64() // We use seed 0 to indicate an uninitialized seed/hash, // so keep trying until we get a non-zero seed. if s != 0 { @@ -263,28 +254,6 @@ func MakeSeed() Seed { return Seed{s: s} } -//go:linkname runtime_fastrand64 runtime.fastrand64 -func runtime_fastrand64() uint64 - -func rthash(ptr *byte, len int, seed uint64) uint64 { - if len == 0 { - return seed - } - // The runtime hasher only works on uintptr. For 64-bit - // architectures, we use the hasher directly. Otherwise, - // we use two parallel hashers on the lower and upper 32 bits. - if unsafe.Sizeof(uintptr(0)) == 8 { - return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))) - } - lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)) - hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len)) - return uint64(hi)<<32 | uint64(lo) -} - -//go:linkname runtime_memhash runtime.memhash -//go:noescape -func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr - // Sum appends the hash's current 64-bit value to b. // It exists for implementing hash.Hash. // For direct calls, it is more efficient to use Sum64. diff --git a/src/hash/maphash/maphash_purego.go b/src/hash/maphash/maphash_purego.go new file mode 100644 index 0000000000..d49a44ae64 --- /dev/null +++ b/src/hash/maphash/maphash_purego.go @@ -0,0 +1,104 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build purego + +package maphash + +import ( + "crypto/rand" + "math/bits" +) + +func rthash(buf []byte, seed uint64) uint64 { + if len(buf) == 0 { + return seed + } + return wyhash(buf, seed, uint64(len(buf))) +} + +func rthashString(s string, state uint64) uint64 { + return rthash([]byte(s), state) +} + +func randUint64() uint64 { + buf := make([]byte, 8) + _, _ = rand.Read(buf) + return leUint64(buf) +} + +// This is a port of wyhash implementation in runtime/hash64.go, +// without using unsafe for purego. + +const ( + m1 = 0xa0761d6478bd642f + m2 = 0xe7037ed1a0b428db + m3 = 0x8ebc6af09c88c6e3 + m4 = 0x589965cc75374cc3 + m5 = 0x1d8e4e27c47d124f +) + +func wyhash(key []byte, seed, len uint64) uint64 { + p := key + i := len + var a, b uint64 + seed ^= m1 + + if i > 16 { + if i > 48 { + seed1 := seed + seed2 := seed + for ; i > 48; i -= 48 { + seed = mix(r8(p)^m2, r8(p[8:])^seed) + seed1 = mix(r8(p[16:])^m3, r8(p[24:])^seed1) + seed2 = mix(r8(p[32:])^m4, r8(p[40:])^seed2) + p = p[48:] + } + seed ^= seed1 ^ seed2 + } + for ; i > 16; i -= 16 { + seed = mix(r8(p)^m2, r8(p[8:])^seed) + p = p[16:] + } + } + switch { + case i == 0: + return seed + case i < 4: + a = r3(p, i) + default: + n := (i >> 3) << 2 + a = r4(p)<<32 | r4(p[n:]) + b = r4(p[i-4:])<<32 | r4(p[i-4-n:]) + } + return mix(m5^len, mix(a^m2, b^seed)) +} + +func r3(p []byte, k uint64) uint64 { + return (uint64(p[0]) << 16) | (uint64(p[k>>1]) << 8) | uint64(p[k-1]) +} + +func r4(p []byte) uint64 { + return uint64(leUint32(p)) +} + +func r8(p []byte) uint64 { + return leUint64(p) +} + +func mix(a, b uint64) uint64 { + hi, lo := bits.Mul64(a, b) + return hi ^ lo +} + +func leUint32(b []byte) uint32 { + _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808 + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func leUint64(b []byte) uint64 { + _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} diff --git a/src/hash/maphash/maphash_runtime.go b/src/hash/maphash/maphash_runtime.go new file mode 100644 index 0000000000..98097ff9c3 --- /dev/null +++ b/src/hash/maphash/maphash_runtime.go @@ -0,0 +1,43 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !purego + +package maphash + +import ( + "unsafe" +) + +//go:linkname runtime_fastrand64 runtime.fastrand64 +func runtime_fastrand64() uint64 + +//go:linkname runtime_memhash runtime.memhash +//go:noescape +func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr + +func rthash(buf []byte, seed uint64) uint64 { + if len(buf) == 0 { + return seed + } + len := len(buf) + // The runtime hasher only works on uintptr. For 64-bit + // architectures, we use the hasher directly. Otherwise, + // we use two parallel hashers on the lower and upper 32 bits. + if unsafe.Sizeof(uintptr(0)) == 8 { + return uint64(runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed), uintptr(len))) + } + lo := runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed), uintptr(len)) + hi := runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed>>32), uintptr(len)) + return uint64(hi)<<32 | uint64(lo) +} + +func rthashString(s string, state uint64) uint64 { + buf := unsafe.Slice(unsafe.StringData(s), len(s)) + return rthash(buf, state) +} + +func randUint64() uint64 { + return runtime_fastrand64() +}