]> Cypherpunks repositories - gostls13.git/commitdiff
hash/maphash: add Bytes and String
authorRuss Cox <rsc@golang.org>
Mon, 14 Mar 2022 16:31:33 +0000 (12:31 -0400)
committerRuss Cox <rsc@golang.org>
Tue, 5 Apr 2022 18:01:26 +0000 (18:01 +0000)
For very small inputs, h.Reset+h.Write+h.Sum64 is fundamentally
slower than a single operation, by about a factor of two, because
Write must copy the data into h's buffer, just in case there is another
Write before the Sum64.

A single function doing the whole sequence knows there is no extra
write that will happen, so it doesn't need the buffer, so it avoids the copy.

Fixes #42710.

Change-Id: Icc79c68ccb10827f6640071d026df86b4940fcc1
Reviewed-on: https://go-review.googlesource.com/c/go/+/392494
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>

api/next/42710.txt [new file with mode: 0644]
src/hash/maphash/maphash.go
src/hash/maphash/maphash_test.go

diff --git a/api/next/42710.txt b/api/next/42710.txt
new file mode 100644 (file)
index 0000000..7879758
--- /dev/null
@@ -0,0 +1,2 @@
+pkg hash/maphash, func Bytes(Seed, []uint8) uint64 #42710
+pkg hash/maphash, func String(Seed, string) uint64 #42710
index d022d746a74ff64bb882b55f1194409618570057..973fb6870180b50b16b1a8bc8d8eea92738d36e7 100644 (file)
@@ -33,6 +33,54 @@ type Seed struct {
        s uint64
 }
 
+// Bytes returns the hash of b with the given seed.
+//
+// Bytes is equivalent to, but more convenient and efficient than:
+//
+//     var h Hash
+//     h.SetSeed(seed)
+//     h.Write(b)
+//     return h.Sum()
+func Bytes(seed Seed, b []byte) uint64 {
+       state := seed.s
+       if state == 0 {
+               panic("maphash: use of uninitialized Seed")
+       }
+       if len(b) == 0 {
+               return rthash(nil, 0, state) // avoid &b[0] index panic below
+       }
+       if len(b) > bufSize {
+               b = b[:len(b):len(b)] // merge len and cap calculations when reslicing
+               for len(b) > bufSize {
+                       state = rthash(&b[0], bufSize, state)
+                       b = b[bufSize:]
+               }
+       }
+       return rthash(&b[0], len(b), state)
+}
+
+// String returns the hash of s with the given seed.
+//
+// String is equivalent to, but more convenient and efficient than:
+//
+//     var h Hash
+//     h.SetSeed(seed)
+//     h.WriteString(s)
+//     return h.Sum()
+func String(seed Seed, s string) uint64 {
+       state := seed.s
+       if state == 0 {
+               panic("maphash: use of uninitialized Seed")
+       }
+       for len(s) > bufSize {
+               p := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data)
+               state = rthash(p, bufSize, state)
+               s = s[bufSize:]
+       }
+       p := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data)
+       return rthash(p, len(s), state)
+}
+
 // A Hash computes a seeded hash of a byte sequence.
 //
 // The zero Hash is a valid Hash ready to use.
index 78cdfc0e737088843c368b250de79c825af45b1a..75269890731487f19b8b4e6ba40030e9e7686449 100644 (file)
@@ -6,6 +6,7 @@ package maphash
 
 import (
        "bytes"
+       "fmt"
        "hash"
        "testing"
 )
@@ -87,6 +88,14 @@ func TestHashGrouping(t *testing.T) {
                        t.Errorf("hash %d not identical to a single Write", i)
                }
        }
+
+       if sum1 := Bytes(hh[0].Seed(), b); sum1 != hh[0].Sum64() {
+               t.Errorf("hash using Bytes not identical to a single Write")
+       }
+
+       if sum1 := String(hh[0].Seed(), string(b)); sum1 != hh[0].Sum64() {
+               t.Errorf("hash using String not identical to a single Write")
+       }
 }
 
 func TestHashBytesVsString(t *testing.T) {
@@ -208,28 +217,39 @@ var _ hash.Hash64 = &Hash{}
 func benchmarkSize(b *testing.B, size int) {
        h := &Hash{}
        buf := make([]byte, size)
-       b.SetBytes(int64(size))
-       b.ResetTimer()
-
-       for i := 0; i < b.N; i++ {
-               h.Reset()
-               h.Write(buf)
-               h.Sum64()
-       }
-}
-
-func BenchmarkHash8Bytes(b *testing.B) {
-       benchmarkSize(b, 8)
-}
+       s := string(buf)
+
+       b.Run("Write", func(b *testing.B) {
+               b.SetBytes(int64(size))
+               for i := 0; i < b.N; i++ {
+                       h.Reset()
+                       h.Write(buf)
+                       h.Sum64()
+               }
+       })
 
-func BenchmarkHash320Bytes(b *testing.B) {
-       benchmarkSize(b, 320)
-}
+       b.Run("Bytes", func(b *testing.B) {
+               b.SetBytes(int64(size))
+               seed := h.Seed()
+               for i := 0; i < b.N; i++ {
+                       Bytes(seed, buf)
+               }
+       })
 
-func BenchmarkHash1K(b *testing.B) {
-       benchmarkSize(b, 1024)
+       b.Run("String", func(b *testing.B) {
+               b.SetBytes(int64(size))
+               seed := h.Seed()
+               for i := 0; i < b.N; i++ {
+                       String(seed, s)
+               }
+       })
 }
 
-func BenchmarkHash8K(b *testing.B) {
-       benchmarkSize(b, 8192)
+func BenchmarkHash(b *testing.B) {
+       sizes := []int{4, 8, 16, 32, 64, 256, 320, 1024, 4096, 16384}
+       for _, size := range sizes {
+               b.Run(fmt.Sprint("n=", size), func(b *testing.B) {
+                       benchmarkSize(b, size)
+               })
+       }
 }