hash/maphash: add WriteComparable and Comparable

author qiulaidongfeng <2645477756@qq.com>

Wed, 25 Sep 2024 10:32:49 +0000 (10:32 +0000)

committer Keith Randall <khr@golang.org>

Wed, 2 Oct 2024 15:54:11 +0000 (15:54 +0000)
author qiulaidongfeng <2645477756@qq.com>
Wed, 25 Sep 2024 10:32:49 +0000 (10:32 +0000)
committer Keith Randall <khr@golang.org>
Wed, 2 Oct 2024 15:54:11 +0000 (15:54 +0000)
diff --git a/api/next/54670.txt b/api/next/54670.txt

new file mode 100644 (file)

index 0000000..d639a68
--- /dev/null
+++ b/api/next/54670.txt
@@ -0,0 +1,2 @@
+pkg hash/maphash, func Comparable[$0 comparable](Seed, $0) uint64 #54670
+pkg hash/maphash, func WriteComparable[$0 comparable](*Hash, $0) #54670
diff --git a/doc/next/6-stdlib/99-minor/hash/maphash/54670.md b/doc/next/6-stdlib/99-minor/hash/maphash/54670.md

new file mode 100644 (file)

index 0000000..ed67a4c
--- /dev/null
+++ b/doc/next/6-stdlib/99-minor/hash/maphash/54670.md
@@ -0,0 +1,2 @@
+New function [Comparable] returns the hash of comparable value v.
+New function [WriteComparable] adds x to the data hashed by [Hash].
diff --git a/src/hash/maphash/maphash.go b/src/hash/maphash/maphash.go

index 1e70a279f5f193514d5392a637b91d5126622283..02475d558370d77b186b46b0550f8559ee3ebcfd 100644 (file)
--- a/src/hash/maphash/maphash.go
+++ b/src/hash/maphash/maphash.go
@@ -2,7 +2,7 @@
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
-// Package maphash provides hash functions on byte sequences.
+// Package maphash provides hash functions on byte sequences and comparable values.
  // These hash functions are intended to be used to implement hash tables or
  // other data structures that need to map arbitrary strings or byte
  // sequences to a uniform distribution on unsigned 64-bit integers.
@@ -12,6 +12,13 @@
  // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
  package maphash
  
+import (
+       "internal/abi"
+       "internal/byteorder"
+       "math"
+       "reflect"
+)
+
  // A Seed is a random value that selects the specific hash function
  // computed by a [Hash]. If two Hashes use the same Seeds, they
  // will compute the same hash values for any given input.
@@ -275,3 +282,117 @@ func (h *Hash) Size() int { return 8 }
  
  // BlockSize returns h's block size.
  func (h *Hash) BlockSize() int { return len(h.buf) }
+
+// Comparable returns the hash of comparable value v with the given seed
+// such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2.
+// If v != v, then the resulting hash is randomly distributed.
+func Comparable[T comparable](seed Seed, v T) uint64 {
+       comparableReady(v)
+       var h Hash
+       h.SetSeed(seed)
+       comparableF(&h, v)
+       return h.Sum64()
+}
+
+func comparableReady[T comparable](v T) {
+       // Force v to be on the heap.
+       // We cannot hash pointers to local variables,
+       // as the address of the local variable
+       // might change on stack growth.
+       abi.Escape(v)
+}
+
+// WriteComparable adds x to the data hashed by h.
+func WriteComparable[T comparable](h *Hash, x T) {
+       comparableReady(x)
+       comparableF(h, x)
+}
+
+// appendT hash a value,
+// when the value cannot be directly hash raw memory,
+// or when purego is used.
+func appendT(h *Hash, v reflect.Value) {
+       h.WriteString(v.Type().String())
+       switch v.Kind() {
+       case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
+               var buf [8]byte
+               byteorder.LePutUint64(buf[:], uint64(v.Int()))
+               h.Write(buf[:])
+               return
+       case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
+               var buf [8]byte
+               byteorder.LePutUint64(buf[:], v.Uint())
+               h.Write(buf[:])
+               return
+       case reflect.Array:
+               var buf [8]byte
+               for i := range uint64(v.Len()) {
+                       byteorder.LePutUint64(buf[:], i)
+                       // do not want to hash to the same value,
+                       // [2]string{"foo", ""} and [2]string{"", "foo"}.
+                       h.Write(buf[:])
+                       appendT(h, v.Index(int(i)))
+               }
+               return
+       case reflect.String:
+               h.WriteString(v.String())
+               return
+       case reflect.Struct:
+               var buf [8]byte
+               for i := range v.NumField() {
+                       f := v.Field(i)
+                       byteorder.LePutUint64(buf[:], uint64(i))
+                       // do not want to hash to the same value,
+                       // struct{a,b string}{"foo",""} and
+                       // struct{a,b string}{"","foo"}.
+                       h.Write(buf[:])
+                       appendT(h, f)
+               }
+               return
+       case reflect.Complex64, reflect.Complex128:
+               c := v.Complex()
+               h.float64(real(c))
+               h.float64(imag(c))
+               return
+       case reflect.Float32, reflect.Float64:
+               h.float64(v.Float())
+               return
+       case reflect.Bool:
+               h.WriteByte(btoi(v.Bool()))
+               return
+       case reflect.UnsafePointer, reflect.Pointer:
+               var buf [8]byte
+               // because pointing to the abi.Escape call in comparableReady,
+               // So this is ok to hash pointer,
+               // this way because we know their target won't be moved.
+               byteorder.LePutUint64(buf[:], uint64(v.Pointer()))
+               h.Write(buf[:])
+               return
+       case reflect.Interface:
+               appendT(h, v.Elem())
+               return
+       }
+       panic("maphash: " + v.Type().String() + " not comparable")
+}
+
+func (h *Hash) float64(f float64) {
+       if f == 0 {
+               h.WriteByte(0)
+               return
+       }
+       var buf [8]byte
+       if f != f {
+               byteorder.LePutUint64(buf[:], randUint64())
+               h.Write(buf[:])
+               return
+       }
+       byteorder.LePutUint64(buf[:], math.Float64bits(f))
+       h.Write(buf[:])
+}
+
+func btoi(b bool) byte {
+       if b {
+               return 1
+       }
+       return 0
+}
diff --git a/src/hash/maphash/maphash_purego.go b/src/hash/maphash/maphash_purego.go

index 38ac8c4df3c645b0e9fdd9d601f74659c49381b9..be7ac52f2335c386b51f86f4eb2c7f2f26f76692 100644 (file)
--- a/src/hash/maphash/maphash_purego.go
+++ b/src/hash/maphash/maphash_purego.go
@@ -10,6 +10,7 @@ import (
         "crypto/rand"
         "internal/byteorder"
         "math/bits"
+       "reflect"
  )
  
  func rthash(buf []byte, seed uint64) uint64 {
@@ -92,3 +93,8 @@ func mix(a, b uint64) uint64 {
         hi, lo := bits.Mul64(a, b)
         return hi ^ lo
  }
+
+func comparableF[T comparable](h *Hash, v T) {
+       vv := reflect.ValueOf(v)
+       appendT(h, vv)
+}
diff --git a/src/hash/maphash/maphash_runtime.go b/src/hash/maphash/maphash_runtime.go

index b831df2cf4a9c4fdc4389f6d1e67c94358733df8..1570e7dea49601f4d6d28491b0062232150d67ec 100644 (file)
--- a/src/hash/maphash/maphash_runtime.go
+++ b/src/hash/maphash/maphash_runtime.go
@@ -7,6 +7,8 @@
  package maphash
  
  import (
+       "internal/abi"
+       "reflect"
         "unsafe"
  )
  
@@ -41,3 +43,19 @@ func rthashString(s string, state uint64) uint64 {
  func randUint64() uint64 {
         return runtime_rand()
  }
+
+func comparableF[T comparable](h *Hash, v T) {
+       t := abi.TypeFor[T]()
+       // We can only use the raw memory contents for the hash,
+       // if the raw memory contents are used for computing equality.
+       // That works for some types (int),
+       // but not others (float, string, structs with padding, etc.)
+       if t.TFlag&abi.TFlagRegularMemory != 0 {
+               ptr := unsafe.Pointer(&v)
+               l := t.Size()
+               h.Write(unsafe.Slice((*byte)(ptr), l))
+               return
+       }
+       vv := reflect.ValueOf(v)
+       appendT(h, vv)
+}
diff --git a/src/hash/maphash/maphash_test.go b/src/hash/maphash/maphash_test.go

index ed70f0ce51c52f30f975a76f8b2593adffb7d357..f1b292e1011a999e2a5aa93c73c8b4f97be991d7 100644 (file)
--- a/src/hash/maphash/maphash_test.go
+++ b/src/hash/maphash/maphash_test.go
@@ -8,7 +8,10 @@ import (
         "bytes"
         "fmt"
         "hash"
+       "math"
+       "reflect"
         "testing"
+       "unsafe"
  )
  
  func TestUnseededHash(t *testing.T) {
@@ -210,6 +213,199 @@ func TestSeedFromReset(t *testing.T) {
         }
  }
  
+func negativeZero[T float32 | float64]() T {
+       var f T
+       f = -f
+       return f
+}
+
+func TestComparable(t *testing.T) {
+       testComparable(t, int64(2))
+       testComparable(t, uint64(8))
+       testComparable(t, uintptr(12))
+       testComparable(t, any("s"))
+       testComparable(t, "s")
+       testComparable(t, true)
+       testComparable(t, new(float64))
+       testComparable(t, float64(9))
+       testComparable(t, complex128(9i+1))
+       testComparable(t, struct{}{})
+       testComparable(t, struct {
+               i int
+               u uint
+               b bool
+               f float64
+               p *int
+               a any
+       }{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1})
+       type S struct {
+               s string
+       }
+       s1 := S{s: heapStr(t)}
+       s2 := S{s: heapStr(t)}
+       if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) {
+               t.Fatalf("unexpected two heapStr ptr equal")
+       }
+       if s1.s != s2.s {
+               t.Fatalf("unexpected two heapStr value not equal")
+       }
+       testComparable(t, s1, s2)
+       testComparable(t, s1.s, s2.s)
+       testComparable(t, float32(0), negativeZero[float32]())
+       testComparable(t, float64(0), negativeZero[float64]())
+       testComparableNoEqual(t, math.NaN(), math.NaN())
+       testComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"})
+       testComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"})
+       testComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)})
+}
+
+func testComparableNoEqual[T comparable](t *testing.T, v1, v2 T) {
+       seed := MakeSeed()
+       if Comparable(seed, v1) == Comparable(seed, v2) {
+               t.Fatalf("Comparable(seed, %v) == Comparable(seed, %v)", v1, v2)
+       }
+}
+
+var heapStrValue = []byte("aTestString")
+
+func heapStr(t *testing.T) string {
+       return string(heapStrValue)
+}
+
+func testComparable[T comparable](t *testing.T, v T, v2 ...T) {
+       t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) {
+               var a, b T = v, v
+               if len(v2) != 0 {
+                       b = v2[0]
+               }
+               var pa *T = &a
+               seed := MakeSeed()
+               if Comparable(seed, a) != Comparable(seed, b) {
+                       t.Fatalf("Comparable(seed, %v) != Comparable(seed, %v)", a, b)
+               }
+               old := Comparable(seed, pa)
+               stackGrow(8192)
+               new := Comparable(seed, pa)
+               if old != new {
+                       t.Fatal("Comparable(seed, ptr) != Comparable(seed, ptr)")
+               }
+       })
+}
+
+var use byte
+
+//go:noinline
+func stackGrow(dep int) {
+       if dep == 0 {
+               return
+       }
+       var local [1024]byte
+       // make sure local is allocated on the stack.
+       local[randUint64()%1024] = byte(randUint64())
+       use = local[randUint64()%1024]
+       stackGrow(dep - 1)
+}
+
+func TestWriteComparable(t *testing.T) {
+       testWriteComparable(t, int64(2))
+       testWriteComparable(t, uint64(8))
+       testWriteComparable(t, uintptr(12))
+       testWriteComparable(t, any("s"))
+       testWriteComparable(t, "s")
+       testComparable(t, true)
+       testWriteComparable(t, new(float64))
+       testWriteComparable(t, float64(9))
+       testWriteComparable(t, complex128(9i+1))
+       testWriteComparable(t, struct{}{})
+       testWriteComparable(t, struct {
+               i int
+               u uint
+               b bool
+               f float64
+               p *int
+               a any
+       }{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1})
+       type S struct {
+               s string
+       }
+       s1 := S{s: heapStr(t)}
+       s2 := S{s: heapStr(t)}
+       if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) {
+               t.Fatalf("unexpected two heapStr ptr equal")
+       }
+       if s1.s != s2.s {
+               t.Fatalf("unexpected two heapStr value not equal")
+       }
+       testWriteComparable(t, s1, s2)
+       testWriteComparable(t, s1.s, s2.s)
+       testWriteComparable(t, float32(0), negativeZero[float32]())
+       testWriteComparable(t, float64(0), negativeZero[float64]())
+       testWriteComparableNoEqual(t, math.NaN(), math.NaN())
+       testWriteComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"})
+       testWriteComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"})
+       testWriteComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)})
+}
+
+func testWriteComparableNoEqual[T comparable](t *testing.T, v1, v2 T) {
+       seed := MakeSeed()
+       h1 := Hash{}
+       h2 := Hash{}
+       h1.seed, h2.seed = seed, seed
+       WriteComparable(&h1, v1)
+       WriteComparable(&h2, v2)
+       if h1.Sum64() == h2.Sum64() {
+               t.Fatalf("WriteComparable(seed, %v) == WriteComparable(seed, %v)", v1, v2)
+       }
+
+}
+
+func testWriteComparable[T comparable](t *testing.T, v T, v2 ...T) {
+       t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) {
+               var a, b T = v, v
+               if len(v2) != 0 {
+                       b = v2[0]
+               }
+               var pa *T = &a
+               h1 := Hash{}
+               h2 := Hash{}
+               h1.seed = MakeSeed()
+               h2.seed = h1.seed
+               WriteComparable(&h1, a)
+               WriteComparable(&h2, b)
+               if h1.Sum64() != h2.Sum64() {
+                       t.Fatalf("WriteComparable(h, %v) != WriteComparable(h, %v)", a, b)
+               }
+               WriteComparable(&h1, pa)
+               old := h1.Sum64()
+               stackGrow(8192)
+               WriteComparable(&h2, pa)
+               new := h2.Sum64()
+               if old != new {
+                       t.Fatal("WriteComparable(seed, ptr) != WriteComparable(seed, ptr)")
+               }
+       })
+}
+
+func TestComparableShouldPanic(t *testing.T) {
+       s := []byte("s")
+       a := any(s)
+       defer func() {
+               err := recover()
+               if err == nil {
+                       t.Fatalf("hash any([]byte) should panic in maphash.appendT")
+               }
+               s, ok := err.(string)
+               if !ok {
+                       t.Fatalf("hash any([]byte) should panic in maphash.appendT")
+               }
+               want := "maphash: []uint8 not comparable"
+               if s != want {
+                       t.Fatalf("want %s, got %s", want, s)
+               }
+       }()
+       Comparable(MakeSeed(), a)
+}
+
  // Make sure a Hash implements the hash.Hash and hash.Hash64 interfaces.
  var _ hash.Hash = &Hash{}
  var _ hash.Hash64 = &Hash{}
author	qiulaidongfeng <2645477756@qq.com>
	Wed, 25 Sep 2024 10:32:49 +0000 (10:32 +0000)
committer	Keith Randall <khr@golang.org>
	Wed, 2 Oct 2024 15:54:11 +0000 (15:54 +0000)
api/next/54670.txt	[new file with mode: 0644]	patch \| blob
doc/next/6-stdlib/99-minor/hash/maphash/54670.md	[new file with mode: 0644]	patch \| blob
src/hash/maphash/maphash.go		patch \| blob \| history
src/hash/maphash/maphash_purego.go		patch \| blob \| history
src/hash/maphash/maphash_runtime.go		patch \| blob \| history
src/hash/maphash/maphash_test.go		patch \| blob \| history