From 03103a54d830ee14187aac7720e42000927a6ce9 Mon Sep 17 00:00:00 2001 From: qiulaidongfeng <2645477756@qq.com> Date: Wed, 25 Sep 2024 10:32:49 +0000 Subject: [PATCH] hash/maphash: add WriteComparable and Comparable Default, use hash function in the runtime package. If the build tag is purego or raw memory cannot be hash directly, use reflect get each field to hash separately. Fixes #54670 Change-Id: Ic968864c9c3c51883967d4f6dc24432385c7dc79 GitHub-Last-Rev: 5ae8a28834c8b809a52c74617e2a8530acec8095 GitHub-Pull-Request: golang/go#69166 Reviewed-on: https://go-review.googlesource.com/c/go/+/609761 Reviewed-by: Keith Randall Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- api/next/54670.txt | 2 + .../6-stdlib/99-minor/hash/maphash/54670.md | 2 + src/hash/maphash/maphash.go | 123 ++++++++++- src/hash/maphash/maphash_purego.go | 6 + src/hash/maphash/maphash_runtime.go | 18 ++ src/hash/maphash/maphash_test.go | 196 ++++++++++++++++++ 6 files changed, 346 insertions(+), 1 deletion(-) create mode 100644 api/next/54670.txt create mode 100644 doc/next/6-stdlib/99-minor/hash/maphash/54670.md diff --git a/api/next/54670.txt b/api/next/54670.txt new file mode 100644 index 0000000000..d639a68d93 --- /dev/null +++ b/api/next/54670.txt @@ -0,0 +1,2 @@ +pkg hash/maphash, func Comparable[$0 comparable](Seed, $0) uint64 #54670 +pkg hash/maphash, func WriteComparable[$0 comparable](*Hash, $0) #54670 diff --git a/doc/next/6-stdlib/99-minor/hash/maphash/54670.md b/doc/next/6-stdlib/99-minor/hash/maphash/54670.md new file mode 100644 index 0000000000..ed67a4cb1f --- /dev/null +++ b/doc/next/6-stdlib/99-minor/hash/maphash/54670.md @@ -0,0 +1,2 @@ +New function [Comparable] returns the hash of comparable value v. +New function [WriteComparable] adds x to the data hashed by [Hash]. diff --git a/src/hash/maphash/maphash.go b/src/hash/maphash/maphash.go index 1e70a279f5..02475d5583 100644 --- a/src/hash/maphash/maphash.go +++ b/src/hash/maphash/maphash.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package maphash provides hash functions on byte sequences. +// Package maphash provides hash functions on byte sequences and comparable values. // These hash functions are intended to be used to implement hash tables or // other data structures that need to map arbitrary strings or byte // sequences to a uniform distribution on unsigned 64-bit integers. @@ -12,6 +12,13 @@ // (See crypto/sha256 and crypto/sha512 for cryptographic use.) package maphash +import ( + "internal/abi" + "internal/byteorder" + "math" + "reflect" +) + // A Seed is a random value that selects the specific hash function // computed by a [Hash]. If two Hashes use the same Seeds, they // will compute the same hash values for any given input. @@ -275,3 +282,117 @@ func (h *Hash) Size() int { return 8 } // BlockSize returns h's block size. func (h *Hash) BlockSize() int { return len(h.buf) } + +// Comparable returns the hash of comparable value v with the given seed +// such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2. +// If v != v, then the resulting hash is randomly distributed. +func Comparable[T comparable](seed Seed, v T) uint64 { + comparableReady(v) + var h Hash + h.SetSeed(seed) + comparableF(&h, v) + return h.Sum64() +} + +func comparableReady[T comparable](v T) { + // Force v to be on the heap. + // We cannot hash pointers to local variables, + // as the address of the local variable + // might change on stack growth. + abi.Escape(v) +} + +// WriteComparable adds x to the data hashed by h. +func WriteComparable[T comparable](h *Hash, x T) { + comparableReady(x) + comparableF(h, x) +} + +// appendT hash a value, +// when the value cannot be directly hash raw memory, +// or when purego is used. +func appendT(h *Hash, v reflect.Value) { + h.WriteString(v.Type().String()) + switch v.Kind() { + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int: + var buf [8]byte + byteorder.LePutUint64(buf[:], uint64(v.Int())) + h.Write(buf[:]) + return + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr: + var buf [8]byte + byteorder.LePutUint64(buf[:], v.Uint()) + h.Write(buf[:]) + return + case reflect.Array: + var buf [8]byte + for i := range uint64(v.Len()) { + byteorder.LePutUint64(buf[:], i) + // do not want to hash to the same value, + // [2]string{"foo", ""} and [2]string{"", "foo"}. + h.Write(buf[:]) + appendT(h, v.Index(int(i))) + } + return + case reflect.String: + h.WriteString(v.String()) + return + case reflect.Struct: + var buf [8]byte + for i := range v.NumField() { + f := v.Field(i) + byteorder.LePutUint64(buf[:], uint64(i)) + // do not want to hash to the same value, + // struct{a,b string}{"foo",""} and + // struct{a,b string}{"","foo"}. + h.Write(buf[:]) + appendT(h, f) + } + return + case reflect.Complex64, reflect.Complex128: + c := v.Complex() + h.float64(real(c)) + h.float64(imag(c)) + return + case reflect.Float32, reflect.Float64: + h.float64(v.Float()) + return + case reflect.Bool: + h.WriteByte(btoi(v.Bool())) + return + case reflect.UnsafePointer, reflect.Pointer: + var buf [8]byte + // because pointing to the abi.Escape call in comparableReady, + // So this is ok to hash pointer, + // this way because we know their target won't be moved. + byteorder.LePutUint64(buf[:], uint64(v.Pointer())) + h.Write(buf[:]) + return + case reflect.Interface: + appendT(h, v.Elem()) + return + } + panic("maphash: " + v.Type().String() + " not comparable") +} + +func (h *Hash) float64(f float64) { + if f == 0 { + h.WriteByte(0) + return + } + var buf [8]byte + if f != f { + byteorder.LePutUint64(buf[:], randUint64()) + h.Write(buf[:]) + return + } + byteorder.LePutUint64(buf[:], math.Float64bits(f)) + h.Write(buf[:]) +} + +func btoi(b bool) byte { + if b { + return 1 + } + return 0 +} diff --git a/src/hash/maphash/maphash_purego.go b/src/hash/maphash/maphash_purego.go index 38ac8c4df3..be7ac52f23 100644 --- a/src/hash/maphash/maphash_purego.go +++ b/src/hash/maphash/maphash_purego.go @@ -10,6 +10,7 @@ import ( "crypto/rand" "internal/byteorder" "math/bits" + "reflect" ) func rthash(buf []byte, seed uint64) uint64 { @@ -92,3 +93,8 @@ func mix(a, b uint64) uint64 { hi, lo := bits.Mul64(a, b) return hi ^ lo } + +func comparableF[T comparable](h *Hash, v T) { + vv := reflect.ValueOf(v) + appendT(h, vv) +} diff --git a/src/hash/maphash/maphash_runtime.go b/src/hash/maphash/maphash_runtime.go index b831df2cf4..1570e7dea4 100644 --- a/src/hash/maphash/maphash_runtime.go +++ b/src/hash/maphash/maphash_runtime.go @@ -7,6 +7,8 @@ package maphash import ( + "internal/abi" + "reflect" "unsafe" ) @@ -41,3 +43,19 @@ func rthashString(s string, state uint64) uint64 { func randUint64() uint64 { return runtime_rand() } + +func comparableF[T comparable](h *Hash, v T) { + t := abi.TypeFor[T]() + // We can only use the raw memory contents for the hash, + // if the raw memory contents are used for computing equality. + // That works for some types (int), + // but not others (float, string, structs with padding, etc.) + if t.TFlag&abi.TFlagRegularMemory != 0 { + ptr := unsafe.Pointer(&v) + l := t.Size() + h.Write(unsafe.Slice((*byte)(ptr), l)) + return + } + vv := reflect.ValueOf(v) + appendT(h, vv) +} diff --git a/src/hash/maphash/maphash_test.go b/src/hash/maphash/maphash_test.go index ed70f0ce51..f1b292e101 100644 --- a/src/hash/maphash/maphash_test.go +++ b/src/hash/maphash/maphash_test.go @@ -8,7 +8,10 @@ import ( "bytes" "fmt" "hash" + "math" + "reflect" "testing" + "unsafe" ) func TestUnseededHash(t *testing.T) { @@ -210,6 +213,199 @@ func TestSeedFromReset(t *testing.T) { } } +func negativeZero[T float32 | float64]() T { + var f T + f = -f + return f +} + +func TestComparable(t *testing.T) { + testComparable(t, int64(2)) + testComparable(t, uint64(8)) + testComparable(t, uintptr(12)) + testComparable(t, any("s")) + testComparable(t, "s") + testComparable(t, true) + testComparable(t, new(float64)) + testComparable(t, float64(9)) + testComparable(t, complex128(9i+1)) + testComparable(t, struct{}{}) + testComparable(t, struct { + i int + u uint + b bool + f float64 + p *int + a any + }{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1}) + type S struct { + s string + } + s1 := S{s: heapStr(t)} + s2 := S{s: heapStr(t)} + if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) { + t.Fatalf("unexpected two heapStr ptr equal") + } + if s1.s != s2.s { + t.Fatalf("unexpected two heapStr value not equal") + } + testComparable(t, s1, s2) + testComparable(t, s1.s, s2.s) + testComparable(t, float32(0), negativeZero[float32]()) + testComparable(t, float64(0), negativeZero[float64]()) + testComparableNoEqual(t, math.NaN(), math.NaN()) + testComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"}) + testComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"}) + testComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)}) +} + +func testComparableNoEqual[T comparable](t *testing.T, v1, v2 T) { + seed := MakeSeed() + if Comparable(seed, v1) == Comparable(seed, v2) { + t.Fatalf("Comparable(seed, %v) == Comparable(seed, %v)", v1, v2) + } +} + +var heapStrValue = []byte("aTestString") + +func heapStr(t *testing.T) string { + return string(heapStrValue) +} + +func testComparable[T comparable](t *testing.T, v T, v2 ...T) { + t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) { + var a, b T = v, v + if len(v2) != 0 { + b = v2[0] + } + var pa *T = &a + seed := MakeSeed() + if Comparable(seed, a) != Comparable(seed, b) { + t.Fatalf("Comparable(seed, %v) != Comparable(seed, %v)", a, b) + } + old := Comparable(seed, pa) + stackGrow(8192) + new := Comparable(seed, pa) + if old != new { + t.Fatal("Comparable(seed, ptr) != Comparable(seed, ptr)") + } + }) +} + +var use byte + +//go:noinline +func stackGrow(dep int) { + if dep == 0 { + return + } + var local [1024]byte + // make sure local is allocated on the stack. + local[randUint64()%1024] = byte(randUint64()) + use = local[randUint64()%1024] + stackGrow(dep - 1) +} + +func TestWriteComparable(t *testing.T) { + testWriteComparable(t, int64(2)) + testWriteComparable(t, uint64(8)) + testWriteComparable(t, uintptr(12)) + testWriteComparable(t, any("s")) + testWriteComparable(t, "s") + testComparable(t, true) + testWriteComparable(t, new(float64)) + testWriteComparable(t, float64(9)) + testWriteComparable(t, complex128(9i+1)) + testWriteComparable(t, struct{}{}) + testWriteComparable(t, struct { + i int + u uint + b bool + f float64 + p *int + a any + }{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1}) + type S struct { + s string + } + s1 := S{s: heapStr(t)} + s2 := S{s: heapStr(t)} + if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) { + t.Fatalf("unexpected two heapStr ptr equal") + } + if s1.s != s2.s { + t.Fatalf("unexpected two heapStr value not equal") + } + testWriteComparable(t, s1, s2) + testWriteComparable(t, s1.s, s2.s) + testWriteComparable(t, float32(0), negativeZero[float32]()) + testWriteComparable(t, float64(0), negativeZero[float64]()) + testWriteComparableNoEqual(t, math.NaN(), math.NaN()) + testWriteComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"}) + testWriteComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"}) + testWriteComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)}) +} + +func testWriteComparableNoEqual[T comparable](t *testing.T, v1, v2 T) { + seed := MakeSeed() + h1 := Hash{} + h2 := Hash{} + h1.seed, h2.seed = seed, seed + WriteComparable(&h1, v1) + WriteComparable(&h2, v2) + if h1.Sum64() == h2.Sum64() { + t.Fatalf("WriteComparable(seed, %v) == WriteComparable(seed, %v)", v1, v2) + } + +} + +func testWriteComparable[T comparable](t *testing.T, v T, v2 ...T) { + t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) { + var a, b T = v, v + if len(v2) != 0 { + b = v2[0] + } + var pa *T = &a + h1 := Hash{} + h2 := Hash{} + h1.seed = MakeSeed() + h2.seed = h1.seed + WriteComparable(&h1, a) + WriteComparable(&h2, b) + if h1.Sum64() != h2.Sum64() { + t.Fatalf("WriteComparable(h, %v) != WriteComparable(h, %v)", a, b) + } + WriteComparable(&h1, pa) + old := h1.Sum64() + stackGrow(8192) + WriteComparable(&h2, pa) + new := h2.Sum64() + if old != new { + t.Fatal("WriteComparable(seed, ptr) != WriteComparable(seed, ptr)") + } + }) +} + +func TestComparableShouldPanic(t *testing.T) { + s := []byte("s") + a := any(s) + defer func() { + err := recover() + if err == nil { + t.Fatalf("hash any([]byte) should panic in maphash.appendT") + } + s, ok := err.(string) + if !ok { + t.Fatalf("hash any([]byte) should panic in maphash.appendT") + } + want := "maphash: []uint8 not comparable" + if s != want { + t.Fatalf("want %s, got %s", want, s) + } + }() + Comparable(MakeSeed(), a) +} + // Make sure a Hash implements the hash.Hash and hash.Hash64 interfaces. var _ hash.Hash = &Hash{} var _ hash.Hash64 = &Hash{} -- 2.48.1