]> Cypherpunks repositories - gostls13.git/commitdiff
hash/maphash: add WriteComparable and Comparable
authorqiulaidongfeng <2645477756@qq.com>
Wed, 25 Sep 2024 10:32:49 +0000 (10:32 +0000)
committerKeith Randall <khr@golang.org>
Wed, 2 Oct 2024 15:54:11 +0000 (15:54 +0000)
Default, use  hash function in the runtime package.
If the build tag is purego or raw memory cannot be hash directly,
use reflect get each field to hash separately.

Fixes #54670

Change-Id: Ic968864c9c3c51883967d4f6dc24432385c7dc79
GitHub-Last-Rev: 5ae8a28834c8b809a52c74617e2a8530acec8095
GitHub-Pull-Request: golang/go#69166
Reviewed-on: https://go-review.googlesource.com/c/go/+/609761
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
api/next/54670.txt [new file with mode: 0644]
doc/next/6-stdlib/99-minor/hash/maphash/54670.md [new file with mode: 0644]
src/hash/maphash/maphash.go
src/hash/maphash/maphash_purego.go
src/hash/maphash/maphash_runtime.go
src/hash/maphash/maphash_test.go

diff --git a/api/next/54670.txt b/api/next/54670.txt
new file mode 100644 (file)
index 0000000..d639a68
--- /dev/null
@@ -0,0 +1,2 @@
+pkg hash/maphash, func Comparable[$0 comparable](Seed, $0) uint64 #54670
+pkg hash/maphash, func WriteComparable[$0 comparable](*Hash, $0) #54670
diff --git a/doc/next/6-stdlib/99-minor/hash/maphash/54670.md b/doc/next/6-stdlib/99-minor/hash/maphash/54670.md
new file mode 100644 (file)
index 0000000..ed67a4c
--- /dev/null
@@ -0,0 +1,2 @@
+New function [Comparable] returns the hash of comparable value v.
+New function [WriteComparable] adds x to the data hashed by [Hash].
index 1e70a279f5f193514d5392a637b91d5126622283..02475d558370d77b186b46b0550f8559ee3ebcfd 100644 (file)
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package maphash provides hash functions on byte sequences.
+// Package maphash provides hash functions on byte sequences and comparable values.
 // These hash functions are intended to be used to implement hash tables or
 // other data structures that need to map arbitrary strings or byte
 // sequences to a uniform distribution on unsigned 64-bit integers.
 // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
 package maphash
 
+import (
+       "internal/abi"
+       "internal/byteorder"
+       "math"
+       "reflect"
+)
+
 // A Seed is a random value that selects the specific hash function
 // computed by a [Hash]. If two Hashes use the same Seeds, they
 // will compute the same hash values for any given input.
@@ -275,3 +282,117 @@ func (h *Hash) Size() int { return 8 }
 
 // BlockSize returns h's block size.
 func (h *Hash) BlockSize() int { return len(h.buf) }
+
+// Comparable returns the hash of comparable value v with the given seed
+// such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2.
+// If v != v, then the resulting hash is randomly distributed.
+func Comparable[T comparable](seed Seed, v T) uint64 {
+       comparableReady(v)
+       var h Hash
+       h.SetSeed(seed)
+       comparableF(&h, v)
+       return h.Sum64()
+}
+
+func comparableReady[T comparable](v T) {
+       // Force v to be on the heap.
+       // We cannot hash pointers to local variables,
+       // as the address of the local variable
+       // might change on stack growth.
+       abi.Escape(v)
+}
+
+// WriteComparable adds x to the data hashed by h.
+func WriteComparable[T comparable](h *Hash, x T) {
+       comparableReady(x)
+       comparableF(h, x)
+}
+
+// appendT hash a value,
+// when the value cannot be directly hash raw memory,
+// or when purego is used.
+func appendT(h *Hash, v reflect.Value) {
+       h.WriteString(v.Type().String())
+       switch v.Kind() {
+       case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
+               var buf [8]byte
+               byteorder.LePutUint64(buf[:], uint64(v.Int()))
+               h.Write(buf[:])
+               return
+       case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
+               var buf [8]byte
+               byteorder.LePutUint64(buf[:], v.Uint())
+               h.Write(buf[:])
+               return
+       case reflect.Array:
+               var buf [8]byte
+               for i := range uint64(v.Len()) {
+                       byteorder.LePutUint64(buf[:], i)
+                       // do not want to hash to the same value,
+                       // [2]string{"foo", ""} and [2]string{"", "foo"}.
+                       h.Write(buf[:])
+                       appendT(h, v.Index(int(i)))
+               }
+               return
+       case reflect.String:
+               h.WriteString(v.String())
+               return
+       case reflect.Struct:
+               var buf [8]byte
+               for i := range v.NumField() {
+                       f := v.Field(i)
+                       byteorder.LePutUint64(buf[:], uint64(i))
+                       // do not want to hash to the same value,
+                       // struct{a,b string}{"foo",""} and
+                       // struct{a,b string}{"","foo"}.
+                       h.Write(buf[:])
+                       appendT(h, f)
+               }
+               return
+       case reflect.Complex64, reflect.Complex128:
+               c := v.Complex()
+               h.float64(real(c))
+               h.float64(imag(c))
+               return
+       case reflect.Float32, reflect.Float64:
+               h.float64(v.Float())
+               return
+       case reflect.Bool:
+               h.WriteByte(btoi(v.Bool()))
+               return
+       case reflect.UnsafePointer, reflect.Pointer:
+               var buf [8]byte
+               // because pointing to the abi.Escape call in comparableReady,
+               // So this is ok to hash pointer,
+               // this way because we know their target won't be moved.
+               byteorder.LePutUint64(buf[:], uint64(v.Pointer()))
+               h.Write(buf[:])
+               return
+       case reflect.Interface:
+               appendT(h, v.Elem())
+               return
+       }
+       panic("maphash: " + v.Type().String() + " not comparable")
+}
+
+func (h *Hash) float64(f float64) {
+       if f == 0 {
+               h.WriteByte(0)
+               return
+       }
+       var buf [8]byte
+       if f != f {
+               byteorder.LePutUint64(buf[:], randUint64())
+               h.Write(buf[:])
+               return
+       }
+       byteorder.LePutUint64(buf[:], math.Float64bits(f))
+       h.Write(buf[:])
+}
+
+func btoi(b bool) byte {
+       if b {
+               return 1
+       }
+       return 0
+}
index 38ac8c4df3c645b0e9fdd9d601f74659c49381b9..be7ac52f2335c386b51f86f4eb2c7f2f26f76692 100644 (file)
@@ -10,6 +10,7 @@ import (
        "crypto/rand"
        "internal/byteorder"
        "math/bits"
+       "reflect"
 )
 
 func rthash(buf []byte, seed uint64) uint64 {
@@ -92,3 +93,8 @@ func mix(a, b uint64) uint64 {
        hi, lo := bits.Mul64(a, b)
        return hi ^ lo
 }
+
+func comparableF[T comparable](h *Hash, v T) {
+       vv := reflect.ValueOf(v)
+       appendT(h, vv)
+}
index b831df2cf4a9c4fdc4389f6d1e67c94358733df8..1570e7dea49601f4d6d28491b0062232150d67ec 100644 (file)
@@ -7,6 +7,8 @@
 package maphash
 
 import (
+       "internal/abi"
+       "reflect"
        "unsafe"
 )
 
@@ -41,3 +43,19 @@ func rthashString(s string, state uint64) uint64 {
 func randUint64() uint64 {
        return runtime_rand()
 }
+
+func comparableF[T comparable](h *Hash, v T) {
+       t := abi.TypeFor[T]()
+       // We can only use the raw memory contents for the hash,
+       // if the raw memory contents are used for computing equality.
+       // That works for some types (int),
+       // but not others (float, string, structs with padding, etc.)
+       if t.TFlag&abi.TFlagRegularMemory != 0 {
+               ptr := unsafe.Pointer(&v)
+               l := t.Size()
+               h.Write(unsafe.Slice((*byte)(ptr), l))
+               return
+       }
+       vv := reflect.ValueOf(v)
+       appendT(h, vv)
+}
index ed70f0ce51c52f30f975a76f8b2593adffb7d357..f1b292e1011a999e2a5aa93c73c8b4f97be991d7 100644 (file)
@@ -8,7 +8,10 @@ import (
        "bytes"
        "fmt"
        "hash"
+       "math"
+       "reflect"
        "testing"
+       "unsafe"
 )
 
 func TestUnseededHash(t *testing.T) {
@@ -210,6 +213,199 @@ func TestSeedFromReset(t *testing.T) {
        }
 }
 
+func negativeZero[T float32 | float64]() T {
+       var f T
+       f = -f
+       return f
+}
+
+func TestComparable(t *testing.T) {
+       testComparable(t, int64(2))
+       testComparable(t, uint64(8))
+       testComparable(t, uintptr(12))
+       testComparable(t, any("s"))
+       testComparable(t, "s")
+       testComparable(t, true)
+       testComparable(t, new(float64))
+       testComparable(t, float64(9))
+       testComparable(t, complex128(9i+1))
+       testComparable(t, struct{}{})
+       testComparable(t, struct {
+               i int
+               u uint
+               b bool
+               f float64
+               p *int
+               a any
+       }{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1})
+       type S struct {
+               s string
+       }
+       s1 := S{s: heapStr(t)}
+       s2 := S{s: heapStr(t)}
+       if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) {
+               t.Fatalf("unexpected two heapStr ptr equal")
+       }
+       if s1.s != s2.s {
+               t.Fatalf("unexpected two heapStr value not equal")
+       }
+       testComparable(t, s1, s2)
+       testComparable(t, s1.s, s2.s)
+       testComparable(t, float32(0), negativeZero[float32]())
+       testComparable(t, float64(0), negativeZero[float64]())
+       testComparableNoEqual(t, math.NaN(), math.NaN())
+       testComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"})
+       testComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"})
+       testComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)})
+}
+
+func testComparableNoEqual[T comparable](t *testing.T, v1, v2 T) {
+       seed := MakeSeed()
+       if Comparable(seed, v1) == Comparable(seed, v2) {
+               t.Fatalf("Comparable(seed, %v) == Comparable(seed, %v)", v1, v2)
+       }
+}
+
+var heapStrValue = []byte("aTestString")
+
+func heapStr(t *testing.T) string {
+       return string(heapStrValue)
+}
+
+func testComparable[T comparable](t *testing.T, v T, v2 ...T) {
+       t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) {
+               var a, b T = v, v
+               if len(v2) != 0 {
+                       b = v2[0]
+               }
+               var pa *T = &a
+               seed := MakeSeed()
+               if Comparable(seed, a) != Comparable(seed, b) {
+                       t.Fatalf("Comparable(seed, %v) != Comparable(seed, %v)", a, b)
+               }
+               old := Comparable(seed, pa)
+               stackGrow(8192)
+               new := Comparable(seed, pa)
+               if old != new {
+                       t.Fatal("Comparable(seed, ptr) != Comparable(seed, ptr)")
+               }
+       })
+}
+
+var use byte
+
+//go:noinline
+func stackGrow(dep int) {
+       if dep == 0 {
+               return
+       }
+       var local [1024]byte
+       // make sure local is allocated on the stack.
+       local[randUint64()%1024] = byte(randUint64())
+       use = local[randUint64()%1024]
+       stackGrow(dep - 1)
+}
+
+func TestWriteComparable(t *testing.T) {
+       testWriteComparable(t, int64(2))
+       testWriteComparable(t, uint64(8))
+       testWriteComparable(t, uintptr(12))
+       testWriteComparable(t, any("s"))
+       testWriteComparable(t, "s")
+       testComparable(t, true)
+       testWriteComparable(t, new(float64))
+       testWriteComparable(t, float64(9))
+       testWriteComparable(t, complex128(9i+1))
+       testWriteComparable(t, struct{}{})
+       testWriteComparable(t, struct {
+               i int
+               u uint
+               b bool
+               f float64
+               p *int
+               a any
+       }{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1})
+       type S struct {
+               s string
+       }
+       s1 := S{s: heapStr(t)}
+       s2 := S{s: heapStr(t)}
+       if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) {
+               t.Fatalf("unexpected two heapStr ptr equal")
+       }
+       if s1.s != s2.s {
+               t.Fatalf("unexpected two heapStr value not equal")
+       }
+       testWriteComparable(t, s1, s2)
+       testWriteComparable(t, s1.s, s2.s)
+       testWriteComparable(t, float32(0), negativeZero[float32]())
+       testWriteComparable(t, float64(0), negativeZero[float64]())
+       testWriteComparableNoEqual(t, math.NaN(), math.NaN())
+       testWriteComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"})
+       testWriteComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"})
+       testWriteComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)})
+}
+
+func testWriteComparableNoEqual[T comparable](t *testing.T, v1, v2 T) {
+       seed := MakeSeed()
+       h1 := Hash{}
+       h2 := Hash{}
+       h1.seed, h2.seed = seed, seed
+       WriteComparable(&h1, v1)
+       WriteComparable(&h2, v2)
+       if h1.Sum64() == h2.Sum64() {
+               t.Fatalf("WriteComparable(seed, %v) == WriteComparable(seed, %v)", v1, v2)
+       }
+
+}
+
+func testWriteComparable[T comparable](t *testing.T, v T, v2 ...T) {
+       t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) {
+               var a, b T = v, v
+               if len(v2) != 0 {
+                       b = v2[0]
+               }
+               var pa *T = &a
+               h1 := Hash{}
+               h2 := Hash{}
+               h1.seed = MakeSeed()
+               h2.seed = h1.seed
+               WriteComparable(&h1, a)
+               WriteComparable(&h2, b)
+               if h1.Sum64() != h2.Sum64() {
+                       t.Fatalf("WriteComparable(h, %v) != WriteComparable(h, %v)", a, b)
+               }
+               WriteComparable(&h1, pa)
+               old := h1.Sum64()
+               stackGrow(8192)
+               WriteComparable(&h2, pa)
+               new := h2.Sum64()
+               if old != new {
+                       t.Fatal("WriteComparable(seed, ptr) != WriteComparable(seed, ptr)")
+               }
+       })
+}
+
+func TestComparableShouldPanic(t *testing.T) {
+       s := []byte("s")
+       a := any(s)
+       defer func() {
+               err := recover()
+               if err == nil {
+                       t.Fatalf("hash any([]byte) should panic in maphash.appendT")
+               }
+               s, ok := err.(string)
+               if !ok {
+                       t.Fatalf("hash any([]byte) should panic in maphash.appendT")
+               }
+               want := "maphash: []uint8 not comparable"
+               if s != want {
+                       t.Fatalf("want %s, got %s", want, s)
+               }
+       }()
+       Comparable(MakeSeed(), a)
+}
+
 // Make sure a Hash implements the hash.Hash and hash.Hash64 interfaces.
 var _ hash.Hash = &Hash{}
 var _ hash.Hash64 = &Hash{}