// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Package hash/maphash provides hash functions on byte sequences. These
-// hash functions are intended to be used to implement hash tables or
+// Package maphash provides hash functions on byte sequences.
+// These hash functions are intended to be used to implement hash tables or
// other data structures that need to map arbitrary strings or byte
-// sequences to a uniform distribution of integers. The hash functions
-// are collision-resistant but are not cryptographically secure (use
-// one of the hash functions in crypto/* if you need that).
+// sequences to a uniform distribution of integers.
//
-// The produced hashes depend only on the sequence of bytes provided
-// to the Hash object, not on the way in which they are provided. For
-// example, the calls
-// h.AddString("foo")
-// h.AddBytes([]byte{'f','o','o'})
-// h.AddByte('f'); h.AddByte('o'); h.AddByte('o')
-// will all have the same effect.
-//
-// Two Hash instances in the same process using the same seed
-// behave identically.
-//
-// Two Hash instances with the same seed in different processes are
-// not guaranteed to behave identically, even if the processes share
-// the same binary.
-//
-// Hashes are intended to be collision-resistant, even for situations
-// where an adversary controls the byte sequences being hashed.
-// All bits of the Hash result are close to uniformly and
-// independently distributed, so can be safely restricted to a range
-// using bit masking, shifting, or modular arithmetic.
+// The hash functions are collision-resistant but not cryptographically secure.
+// (See crypto/sha256 and crypto/sha512 for cryptographic use.)
package maphash
-import (
- "unsafe"
-)
+import "unsafe"
-// A Seed controls the behavior of a Hash. Two Hash objects with the
-// same seed in the same process will behave identically. Two Hash
-// objects with different seeds will very likely behave differently.
+// A Seed is a random value that selects the specific hash function
+// computed by a Hash. If two Hashes use the same Seeds, they
+// will compute the same hash values for any given input.
+// If two Hashes use different Seeds, they are very likely to compute
+// distinct hash values for any given input.
+//
+// A Seed must be initialized by calling MakeSeed.
+// The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
+//
+// Each Seed value is local to a single process and cannot be serialized
+// or otherwise recreated in a different process.
type Seed struct {
s uint64
}
-// A Hash object is used to compute the hash of a byte sequence.
+// A Hash computes a seeded hash of a byte sequence.
+//
+// The zero Hash is a valid Hash ready to use.
+// A zero Hash chooses a random seed for itself during
+// the first call to a Reset, Write, Seed, Sum64, or Seed method.
+// For control over the seed, use SetSeed.
+//
+// The computed hash values depend only on the initial seed and
+// the sequence of bytes provided to the Hash object, not on the way
+// in which the bytes are provided. For example, the three sequences
+//
+// h.Write([]byte{'f','o','o'})
+// h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
+// h.WriteString("foo")
+//
+// all have the same effect.
+//
+// Hashes are intended to be collision-resistant, even for situations
+// where an adversary controls the byte sequences being hashed.
+//
+// A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
+// If multiple goroutines must compute the same seeded hash,
+// each can declare its own Hash and call SetSeed with a common Seed.
type Hash struct {
- seed Seed // initial seed used for this hash
- state Seed // current hash of all flushed bytes
- buf [64]byte // unflushed byte buffer
- n int // number of unflushed bytes
+ _ [0]func() // not comparable
+ seed Seed // initial seed used for this hash
+ state Seed // current hash of all flushed bytes
+ buf [64]byte // unflushed byte buffer
+ n int // number of unflushed bytes
+}
+
+// initSeed seeds the hash if necessary.
+// initSeed is called lazily before any operation that actually uses h.seed/h.state.
+// Note that this does not include Write/WriteByte/WriteString in the case
+// where they only add to h.buf. (If they write too much, they call h.flush,
+// which does call h.initSeed.)
+func (h *Hash) initSeed() {
+ if h.seed.s == 0 {
+ h.SetSeed(MakeSeed())
+ }
}
-// AddByte adds b to the sequence of bytes hashed by h.
-func (h *Hash) AddByte(b byte) {
+// WriteByte adds b to the sequence of bytes hashed by h.
+// It never fails; the error result is for implementing io.ByteWriter.
+func (h *Hash) WriteByte(b byte) error {
if h.n == len(h.buf) {
h.flush()
}
h.buf[h.n] = b
h.n++
+ return nil
}
-// AddBytes adds b to the sequence of bytes hashed by h.
-func (h *Hash) AddBytes(b []byte) {
+// Write adds b to the sequence of bytes hashed by h.
+// It always writes all of b and never fails; the count and error result are for implementing io.Writer.
+func (h *Hash) Write(b []byte) (int, error) {
+ size := len(b)
for h.n+len(b) > len(h.buf) {
k := copy(h.buf[h.n:], b)
h.n = len(h.buf)
h.flush()
}
h.n += copy(h.buf[h.n:], b)
+ return size, nil
}
-// AddString adds the bytes of s to the sequence of bytes hashed by h.
-func (h *Hash) AddString(s string) {
+// WriteString adds the bytes of s to the sequence of bytes hashed by h.
+// It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
+func (h *Hash) WriteString(s string) (int, error) {
+ size := len(s)
for h.n+len(s) > len(h.buf) {
k := copy(h.buf[h.n:], s)
h.n = len(h.buf)
h.flush()
}
h.n += copy(h.buf[h.n:], s)
+ return size, nil
}
-// Seed returns the seed value specified in the most recent call to
-// SetSeed, or the initial seed if SetSeed was never called.
+// Seed returns h's seed value.
func (h *Hash) Seed() Seed {
+ h.initSeed()
return h.seed
}
-// SetSeed sets the seed used by h. Two Hash objects with the same
-// seed in the same process will behave identically. Two Hash objects
-// with different seeds will very likely behave differently. Any
-// bytes added to h previous to this call will be discarded.
+// SetSeed sets h to use seed, which must have been returned by MakeSeed
+// or by another Hash's Seed method.
+// Two Hash objects with the same seed behave identically.
+// Two Hash objects with different seeds will very likely behave differently.
+// Any bytes added to h before this call will be discarded.
func (h *Hash) SetSeed(seed Seed) {
+ if seed.s == 0 {
+ panic("maphash: use of uninitialized Seed")
+ }
h.seed = seed
h.state = seed
h.n = 0
// Reset discards all bytes added to h.
// (The seed remains the same.)
func (h *Hash) Reset() {
+ h.initSeed()
h.state = h.seed
h.n = 0
}
// precondition: buffer is full.
func (h *Hash) flush() {
if h.n != len(h.buf) {
- panic("flush of partially full buffer")
+ panic("maphash: flush of partially full buffer")
}
+ h.initSeed()
h.state.s = rthash(h.buf[:], h.state.s)
h.n = 0
}
-// Hash returns a value which depends on h's seed and the sequence of
-// bytes added to h (since the last call to Reset or SetSeed).
-func (h *Hash) Hash() uint64 {
+// Sum64 returns h's current 64-bit value, which depends on
+// h's seed and the sequence of bytes added to h since the
+// last call to Reset or SetSeed.
+//
+// All bits of the Sum64 result are close to uniformly and
+// independently distributed, so it can be safely reduced
+// by using bit masking, shifting, or modular arithmetic.
+func (h *Hash) Sum64() uint64 {
+ h.initSeed()
return rthash(h.buf[:h.n], h.state.s)
}
-// MakeSeed returns a Seed initialized using the bits in s.
-// Two seeds generated with the same s are guaranteed to be equal.
-// Two seeds generated with different s are very likely to be different.
-// TODO: disallow this? See Alan's comment in the issue.
-func MakeSeed(s uint64) Seed {
- return Seed{s: s}
-}
-
-// New returns a new Hash object. Different hash objects allocated by
-// this function will very likely have different seeds.
-func New() *Hash {
- s1 := uint64(runtime_fastrand())
- s2 := uint64(runtime_fastrand())
- seed := Seed{s: s1<<32 + s2}
- return &Hash{
- seed: seed,
- state: seed,
+// MakeSeed returns a new random seed.
+func MakeSeed() Seed {
+ var s1, s2 uint64
+ for {
+ s1 = uint64(runtime_fastrand())
+ s2 = uint64(runtime_fastrand())
+ // We use seed 0 to indicate an uninitialized seed/hash,
+ // so keep trying until we get a non-zero seed.
+ if s1|s2 != 0 {
+ break
+ }
}
+ return Seed{s: s1<<32 + s2}
}
//go:linkname runtime_fastrand runtime.fastrand
}
lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))
hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b)))
- // TODO: mix lo/hi? Get 64 bits some other way?
return uint64(hi)<<32 | uint64(lo)
}
//go:linkname runtime_memhash runtime.memhash
func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
-// Wrapper functions so that a hash/maphash.Hash implements
-// the hash.Hash and hash.Hash64 interfaces.
-
-func (h *Hash) Write(b []byte) (int, error) {
- h.AddBytes(b)
- return len(b), nil
-}
+// Sum appends the hash's current 64-bit value to b.
+// It exists for implementing hash.Hash.
+// For direct calls, it is more efficient to use Sum64.
func (h *Hash) Sum(b []byte) []byte {
- x := h.Hash()
+ x := h.Sum64()
return append(b,
byte(x>>0),
byte(x>>8),
byte(x>>48),
byte(x>>56))
}
-func (h *Hash) Sum64() uint64 {
- return h.Hash()
-}
-func (h *Hash) Size() int { return 8 }
+
+// Size returns h's hash value size, 8 bytes.
+func (h *Hash) Size() int { return 8 }
+
+// BlockSize returns h's block size.
func (h *Hash) BlockSize() int { return len(h.buf) }
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package maphash_test
+package maphash
import (
"hash"
- "hash/maphash"
"testing"
)
func TestUnseededHash(t *testing.T) {
m := map[uint64]struct{}{}
for i := 0; i < 1000; i++ {
- h := maphash.New()
- m[h.Hash()] = struct{}{}
+ h := new(Hash)
+ m[h.Sum64()] = struct{}{}
}
if len(m) < 900 {
t.Errorf("empty hash not sufficiently random: got %d, want 1000", len(m))
}
func TestSeededHash(t *testing.T) {
- s := maphash.MakeSeed(1234)
+ s := MakeSeed()
m := map[uint64]struct{}{}
for i := 0; i < 1000; i++ {
- h := maphash.New()
+ h := new(Hash)
h.SetSeed(s)
- m[h.Hash()] = struct{}{}
+ m[h.Sum64()] = struct{}{}
}
if len(m) != 1 {
t.Errorf("seeded hash is random: got %d, want 1", len(m))
func TestHashGrouping(t *testing.T) {
b := []byte("foo")
- h1 := maphash.New()
- h2 := maphash.New()
+ h1 := new(Hash)
+ h2 := new(Hash)
h2.SetSeed(h1.Seed())
- h1.AddBytes(b)
+ h1.Write(b)
for _, x := range b {
- h2.AddByte(x)
+ err := h2.WriteByte(x)
+ if err != nil {
+ t.Fatalf("WriteByte: %v", err)
+ }
}
- if h1.Hash() != h2.Hash() {
+ if h1.Sum64() != h2.Sum64() {
t.Errorf("hash of \"foo\" and \"f\",\"o\",\"o\" not identical")
}
}
func TestHashBytesVsString(t *testing.T) {
s := "foo"
b := []byte(s)
- h1 := maphash.New()
- h2 := maphash.New()
+ h1 := new(Hash)
+ h2 := new(Hash)
h2.SetSeed(h1.Seed())
- h1.AddString(s)
- h2.AddBytes(b)
- if h1.Hash() != h2.Hash() {
- t.Errorf("hash of string and byts not identical")
+ n1, err1 := h1.WriteString(s)
+ if n1 != len(s) || err1 != nil {
+ t.Fatalf("WriteString(s) = %d, %v, want %d, nil", n1, err1, len(s))
+ }
+ n2, err2 := h2.Write(b)
+ if n2 != len(b) || err2 != nil {
+ t.Fatalf("Write(b) = %d, %v, want %d, nil", n2, err2, len(b))
+ }
+ if h1.Sum64() != h2.Sum64() {
+ t.Errorf("hash of string and bytes not identical")
}
}
const N = 10
m := map[uint64]struct{}{}
for i := 0; i < N; i++ {
- h := maphash.New()
- h.AddString("foo")
- m[h.Hash()>>32] = struct{}{}
+ h := new(Hash)
+ h.WriteString("foo")
+ m[h.Sum64()>>32] = struct{}{}
}
if len(m) < N/2 {
t.Errorf("from %d seeds, wanted at least %d different hashes; got %d", N, N/2, len(m))
}
// Make sure a Hash implements the hash.Hash and hash.Hash64 interfaces.
-var _ hash.Hash = &maphash.Hash{}
-var _ hash.Hash64 = &maphash.Hash{}
+var _ hash.Hash = &Hash{}
+var _ hash.Hash64 = &Hash{}
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package maphash_test
+package maphash
import (
"fmt"
- "hash/maphash"
"math"
"math/rand"
"runtime"
// https://code.google.com/p/smhasher/
// This code is a port of some of the Smhasher tests to Go.
+var fixedSeed = MakeSeed()
+
// Sanity checks.
// hash should not depend on values outside key.
// hash should not depend on alignment.
randBytes(r, b[:])
randBytes(r, c[:])
copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
- if bytesHash(b[PAD:PAD+n], 0) != bytesHash(c[PAD+i:PAD+i+n], 0) {
+ if bytesHash(b[PAD:PAD+n]) != bytesHash(c[PAD+i:PAD+i+n]) {
t.Errorf("hash depends on bytes outside key")
}
}
}
}
-func bytesHash(b []byte, seed uint64) uint64 {
- h := maphash.New()
- h.SetSeed(maphash.MakeSeed(seed))
- h.AddBytes(b)
- return h.Hash()
+func bytesHash(b []byte) uint64 {
+ var h Hash
+ h.SetSeed(fixedSeed)
+ h.Write(b)
+ return h.Sum64()
}
-func stringHash(s string, seed uint64) uint64 {
- h := maphash.New()
- h.SetSeed(maphash.MakeSeed(seed))
- h.AddString(s)
- return h.Hash()
+func stringHash(s string) uint64 {
+ var h Hash
+ h.SetSeed(fixedSeed)
+ h.WriteString(s)
+ return h.Sum64()
}
const hashSize = 64
s.n++
}
func (s *hashSet) addS(x string) {
- s.add(stringHash(x, 0))
+ s.add(stringHash(x))
}
func (s *hashSet) addB(x []byte) {
- s.add(bytesHash(x, 0))
+ s.add(bytesHash(x))
}
-func (s *hashSet) addS_seed(x string, seed uint64) {
- s.add(stringHash(x, seed))
+func (s *hashSet) addS_seed(x string, seed Seed) {
+ var h Hash
+ h.SetSeed(seed)
+ h.WriteString(x)
+ s.add(h.Sum64())
}
func (s *hashSet) check(t *testing.T) {
const SLOP = 10.0
k.b[i>>3] ^= byte(1 << uint(i&7))
}
func (k *bytesKey) hash() uint64 {
- return bytesHash(k.b, 0)
+ return bytesHash(k.b)
}
func (k *bytesKey) name() string {
return fmt.Sprintf("bytes%d", len(k.b))
const N = 100000
s := "hello"
for i := 0; i < N; i++ {
- h.addS_seed(s, uint64(i))
- h.addS_seed(s, uint64(i)<<32) // make sure high bits are used
+ h.addS_seed(s, Seed{s: uint64(i + 1)})
+ h.addS_seed(s, Seed{s: uint64(i+1) << 32}) // make sure high bits are used
}
h.check(t)
}