]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: use equality signatures in hash function generation
authorKeith Randall <khr@golang.org>
Fri, 5 Dec 2025 01:27:02 +0000 (17:27 -0800)
committerKeith Randall <khr@golang.org>
Sat, 24 Jan 2026 04:58:13 +0000 (20:58 -0800)
There aren't a huge number of generated hash functions, so this probably
won't save a whole lot of memory. But it means we can clean up a bunch
of code by basing equality and hashing on the same underlying infrastructure.

Change-Id: I36ed1e49044fecb33120d8736f1c0403a4a2554e
Reviewed-on: https://go-review.googlesource.com/c/go/+/727500
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/cmd/compile/internal/reflectdata/alg.go
src/runtime/alg.go

index bf52fb4b718a39a9e7656f12239c12e12fddd204..872a37c21a7e71718afedd2391ffdb7702d8f513 100644 (file)
@@ -11,7 +11,6 @@ import (
        "strings"
 
        "cmd/compile/internal/base"
-       "cmd/compile/internal/compare"
        "cmd/compile/internal/ir"
        "cmd/compile/internal/objw"
        "cmd/compile/internal/typecheck"
@@ -50,105 +49,91 @@ func AlgType(t *types.Type) types.AlgKind {
 
 // genhash returns a symbol which is the closure used to compute
 // the hash of a value of type t.
-// Note: the generated function must match runtime.typehash exactly.
 func genhash(t *types.Type) *obj.LSym {
-       switch AlgType(t) {
-       default:
-               // genhash is only called for types that have equality
-               base.Fatalf("genhash %v", t)
-       case types.AMEM0:
+       return genhashSig(eqSignature(t))
+}
+
+func genhashSig(sig string) *obj.LSym {
+       if len(sig) > 0 && sig[0] == sigAlign {
+               _, sig = parseNum(sig[1:])
+       }
+       switch sig {
+       case "":
                return sysClosure("memhash0")
-       case types.AMEM8:
+       case string(sigMemory) + "1":
                return sysClosure("memhash8")
-       case types.AMEM16:
+       case string(sigMemory) + "2":
                return sysClosure("memhash16")
-       case types.AMEM32:
+       case string(sigMemory) + "4":
                return sysClosure("memhash32")
-       case types.AMEM64:
+       case string(sigMemory) + "8":
                return sysClosure("memhash64")
-       case types.AMEM128:
+       case string(sigMemory) + "16":
                return sysClosure("memhash128")
-       case types.ASTRING:
+       case string(sigString):
                return sysClosure("strhash")
-       case types.AINTER:
+       case string(sigIface):
                return sysClosure("interhash")
-       case types.ANILINTER:
+       case string(sigEface):
                return sysClosure("nilinterhash")
-       case types.AFLOAT32:
+       case string(sigFloat32):
                return sysClosure("f32hash")
-       case types.AFLOAT64:
+       case string(sigFloat64):
                return sysClosure("f64hash")
-       case types.ACPLX64:
+       case string(sigFloat32) + string(sigFloat32):
                return sysClosure("c64hash")
-       case types.ACPLX128:
+       case string(sigFloat64) + string(sigFloat64):
                return sysClosure("c128hash")
-       case types.AMEM:
-               // For other sizes of plain memory, we build a closure
-               // that calls memhash_varlen. The size of the memory is
-               // encoded in the first slot of the closure.
-               closure := TypeLinksymLookup(fmt.Sprintf(".hashfunc%d", t.Size()))
-               if len(closure.P) > 0 { // already generated
-                       return closure
-               }
-               if memhashvarlen == nil {
-                       memhashvarlen = typecheck.LookupRuntimeFunc("memhash_varlen")
-               }
-               ot := 0
-               ot = objw.SymPtr(closure, ot, memhashvarlen, 0)
-               ot = objw.Uintptr(closure, ot, uint64(t.Size())) // size encoded in closure
-               objw.Global(closure, int32(ot), obj.DUPOK|obj.RODATA)
-               return closure
-       case types.ASPECIAL:
-               break
        }
 
-       closure := TypeLinksymPrefix(".hashfunc", t)
+       closure := TypeLinksymLookup(".hashfunc." + sig)
        if len(closure.P) > 0 { // already generated
                return closure
        }
 
-       // Generate hash functions for subtypes.
-       // There are cases where we might not use these hashes,
-       // but in that case they will get dead-code eliminated.
-       // (And the closure generated by genhash will also get
-       // dead-code eliminated, as we call the subtype hashers
-       // directly.)
-       switch t.Kind() {
-       case types.TARRAY:
-               genhash(t.Elem())
-       case types.TSTRUCT:
-               for _, f := range t.Fields() {
-                       genhash(f.Type)
+       if sig[0] == sigMemory {
+               n, rest := parseNum(sig[1:])
+               if rest == "" {
+                       // Just M%d. We can make a memhash_varlen closure.
+                       // The size of the memory region to hash is encoded in the closure.
+                       if memhashvarlen == nil {
+                               memhashvarlen = typecheck.LookupRuntimeFunc("memhash_varlen")
+                       }
+                       ot := 0
+                       ot = objw.SymPtr(closure, ot, memhashvarlen, 0)
+                       ot = objw.Uintptr(closure, ot, uint64(n)) // size encoded in closue
+                       objw.Global(closure, int32(ot), obj.DUPOK|obj.RODATA)
+                       return closure
                }
        }
 
        if base.Flag.LowerR != 0 {
-               fmt.Printf("genhash %v %v\n", closure, t)
+               fmt.Printf("genhash %s\n", sig)
        }
 
-       fn := hashFunc(t)
+       fn := hashFunc(sig)
 
        // Build closure. It doesn't close over any variables, so
        // it contains just the function pointer.
        objw.SymPtr(closure, 0, fn.Linksym(), 0)
        objw.Global(closure, int32(types.PtrSize), obj.DUPOK|obj.RODATA)
-
        return closure
 }
 
-func hashFunc(t *types.Type) *ir.Func {
-       sym := TypeSymPrefix(".hash", t)
+func hashFunc(sig string) *ir.Func {
+       sym := types.TypeSymLookup(".hash." + sig)
        if sym.Def != nil {
                return sym.Def.(*ir.Name).Func
        }
+       sig0 := sig
 
        pos := base.AutogeneratedPos // less confusing than end of input
        base.Pos = pos
 
-       // func sym(p *T, h uintptr) uintptr
+       // func sym(p unsafe.Pointer, h uintptr) uintptr
        fn := ir.NewFunc(pos, pos, sym, types.NewSignature(nil,
                []*types.Field{
-                       types.NewField(pos, typecheck.Lookup("p"), types.NewPtr(t)),
+                       types.NewField(pos, typecheck.Lookup("p"), types.Types[types.TUNSAFEPTR]),
                        types.NewField(pos, typecheck.Lookup("h"), types.Types[types.TUINTPTR]),
                },
                []*types.Field{
@@ -157,81 +142,121 @@ func hashFunc(t *types.Type) *ir.Func {
        ))
        sym.Def = fn.Nname
        fn.Pragma |= ir.Noinline // TODO(mdempsky): We need to emit this during the unified frontend instead, to allow inlining.
-
        typecheck.DeclFunc(fn)
        np := fn.Dcl[0]
        nh := fn.Dcl[1]
 
-       switch t.Kind() {
-       case types.TARRAY:
-               // An array of pure memory would be handled by the
-               // standard algorithm, so the element type must not be
-               // pure memory.
-               hashel := hashfor(t.Elem())
-
-               // for i := 0; i < nelem; i++
-               ni := typecheck.TempAt(base.Pos, ir.CurFunc, types.Types[types.TINT])
-               init := ir.NewAssignStmt(base.Pos, ni, ir.NewInt(base.Pos, 0))
-               cond := ir.NewBinaryExpr(base.Pos, ir.OLT, ni, ir.NewInt(base.Pos, t.NumElem()))
-               post := ir.NewAssignStmt(base.Pos, ni, ir.NewBinaryExpr(base.Pos, ir.OADD, ni, ir.NewInt(base.Pos, 1)))
-               loop := ir.NewForStmt(base.Pos, nil, cond, post, nil, false)
-               loop.PtrInit().Append(init)
-
-               // h = hashel(&p[i], h)
-               call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
-
-               nx := ir.NewIndexExpr(base.Pos, np, ni)
-               nx.SetBounded(true)
-               na := typecheck.NodAddr(nx)
-               call.Args.Append(na)
-               call.Args.Append(nh)
-               loop.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
-
-               fn.Body.Append(loop)
+       // Skip alignment, hash functions can handle unaligned data.
+       if len(sig) > 0 && sig[0] == sigAlign {
+               _, sig = parseNum(sig[1:])
+       }
 
-       case types.TSTRUCT:
-               // Walk the struct using memhash for runs of AMEM
-               // and calling specific hash functions for the others.
-               for i, fields := 0, t.Fields(); i < len(fields); {
-                       f := fields[i]
+       // offset from np that we're currently working on
+       var off int64
 
-                       // Skip blank fields.
-                       if f.Sym.IsBlank() {
-                               i++
-                               continue
-                       }
+       // Return np+off cast to a t (t must be a pointer-y type).
+       ptr := func(t *types.Type) ir.Node {
+               c := ir.NewBasicLit(pos, types.Types[types.TUINTPTR], constant.MakeInt64(off))
+               p := ir.NewBinaryExpr(pos, ir.OUNSAFEADD, np, c)
+               return ir.NewConvExpr(pos, ir.OCONVNOP, t, p)
+       }
+       // hash data of type t at np+off.
+       // Increment off by the size of t.
+       hash := func(t *types.Type) {
+               p := ptr(t.PtrTo())
+               hashFn := hashfor(t)
+               call := ir.NewCallExpr(pos, ir.OCALL, hashFn, []ir.Node{p, nh})
+               fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
+               off += t.Size()
+       }
 
-                       // Hash non-memory fields with appropriate hash function.
-                       if !compare.IsRegularMemory(f.Type) {
-                               hashel := hashfor(f.Type)
-                               call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
-                               na := typecheck.NodAddr(typecheck.DotField(base.Pos, np, i))
-                               call.Args.Append(na)
-                               call.Args.Append(nh)
-                               fn.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
+       for len(sig) > 0 {
+               kind := sig[0]
+               sig = sig[1:]
+               switch kind {
+               case sigMemory:
+                       var n int64
+                       n, sig = parseNum(sig)
+                       switch {
+                       case n == 4:
+                               p := ptr(types.Types[types.TUNSAFEPTR])
+                               memhash := typecheck.LookupRuntime("memhash32")
+                               call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh})
+                               fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
+                       case n == 8:
+                               p := ptr(types.Types[types.TUNSAFEPTR])
+                               memhash := typecheck.LookupRuntime("memhash64")
+                               call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh})
+                               fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
+                       default:
+                               p := ptr(types.Types[types.TUINT8].PtrTo())
+                               memhash := typecheck.LookupRuntime("memhash", types.Types[types.TUINT8])
+                               size := ir.NewBasicLit(pos, types.Types[types.TUINTPTR], constant.MakeInt64(n))
+                               call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh, size})
+                               fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
+                       }
+                       off += n
+               case sigFloat32:
+                       hash(types.Types[types.TFLOAT32])
+               case sigFloat64:
+                       hash(types.Types[types.TFLOAT64])
+               case sigString:
+                       hash(types.Types[types.TSTRING])
+               case sigEface:
+                       hash(types.NewInterface(nil))
+               case sigIface:
+                       // arg kinda hacky. TODO: clean this up.
+                       hash(types.NewInterface([]*types.Field{types.NewField(pos, typecheck.Lookup("A"), types.Types[types.TBOOL])}))
+               case sigSkip:
+                       var n int64
+                       n, sig = parseNum(sig)
+                       off += n
+               case sigArrayStart:
+                       var n int64
+                       n, sig = parseNum(sig)
+                       // Find matching closing brace.
+                       i := 0
+                       depth := 1
+               findEndSquareBracket:
+                       for {
+                               if i == len(sig) {
+                                       base.Fatalf("mismatched brackets in %s", sig0)
+                               }
+                               switch sig[i] {
+                               case sigArrayStart:
+                                       depth++
+                               case sigArrayEnd:
+                                       depth--
+                                       if depth == 0 {
+                                               break findEndSquareBracket
+                                       }
+                               }
                                i++
-                               continue
                        }
+                       elemSig := sig[:i]
+                       elemSize := sigSize(elemSig)
+                       sig = sig[i+1:] // remaining signature after array
 
-                       // Otherwise, hash a maximal length run of raw memory.
-                       size, next := compare.Memrun(t, i)
-
-                       // h = hashel(&p.first, size, h)
-                       hashel := hashmem(f.Type)
-                       call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
-                       na := typecheck.NodAddr(typecheck.DotField(base.Pos, np, i))
-                       call.Args.Append(na)
-                       call.Args.Append(nh)
-                       call.Args.Append(ir.NewInt(base.Pos, size))
-                       fn.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
+                       // Loop N times, calling hash function for the element.
+                       //     for i := off; i < off + N*elemSize; i += elemSize {
+                       //         h = elemfn(p+i, h)
+                       //     }
+                       elemFn := hashFunc(elemSig).Nname
+                       idx := typecheck.TempAt(pos, ir.CurFunc, types.Types[types.TUINTPTR])
+                       init := ir.NewAssignStmt(pos, idx, ir.NewInt(pos, off))
+                       cond := ir.NewBinaryExpr(pos, ir.OLT, idx, ir.NewInt(pos, off+n*elemSize))
+                       post := ir.NewAssignStmt(pos, idx, ir.NewBinaryExpr(pos, ir.OADD, idx, ir.NewInt(pos, elemSize)))
 
-                       i = next
+                       p := ir.NewBinaryExpr(pos, ir.OUNSAFEADD, np, idx)
+                       call := typecheck.Call(pos, elemFn, []ir.Node{p, nh}, false)
+                       as := ir.NewAssignStmt(pos, nh, call)
+                       loop := ir.NewForStmt(pos, init, cond, post, []ir.Node{as}, false)
+                       fn.Body.Append(loop)
+                       off += n * elemSize
                }
        }
 
-       r := ir.NewReturnStmt(base.Pos, nil)
-       r.Results.Append(nh)
-       fn.Body.Append(r)
+       fn.Body.Append(ir.NewReturnStmt(pos, []ir.Node{nh}))
 
        if base.Flag.LowerR != 0 {
                ir.DumpList("genhash body", fn.Body)
@@ -246,7 +271,6 @@ func hashFunc(t *types.Type) *ir.Func {
        })
 
        fn.SetNilCheckDisabled(true)
-
        return fn
 }
 
@@ -257,8 +281,9 @@ func runtimeHashFor(name string, t *types.Type) *ir.Name {
 // hashfor returns the function to compute the hash of a value of type t.
 func hashfor(t *types.Type) *ir.Name {
        switch types.AlgType(t) {
-       case types.AMEM:
-               base.Fatalf("hashfor with AMEM type")
+       default:
+               base.Fatalf("hashfor with bad type %v", t)
+               return nil
        case types.AINTER:
                return runtimeHashFor("interhash", t)
        case types.ANILINTER:
@@ -274,9 +299,6 @@ func hashfor(t *types.Type) *ir.Name {
        case types.ACPLX128:
                return runtimeHashFor("c128hash", t)
        }
-
-       fn := hashFunc(t)
-       return fn.Nname
 }
 
 // sysClosure returns a closure which will call the
@@ -383,8 +405,6 @@ func geneqSig(sig string) *obj.LSym {
        return closure
 }
 
-// TODO: generate hash function from signatures also?
-// They are slightly different, at least at the moment.
 func eqFunc(sig string) *ir.Func {
        sym := types.TypeSymLookup(".eq." + sig)
        if sym.Def != nil {
index c5951dc20b8a53b17c792a2af9052a5f0a82410e..9b726b21803744bb76ec6a50f09f1a9710f9458c 100644 (file)
@@ -199,8 +199,6 @@ func nilinterhash(p unsafe.Pointer, h uintptr) uintptr {
 // is slower but more general and is used for hashing interface types
 // (called from interhash or nilinterhash, above) or for hashing in
 // maps generated by reflect.MapOf (reflect_typehash, below).
-// Note: this function must match the compiler generated
-// functions exactly. See issue 37716.
 //
 // typehash should be an internal detail,
 // but widely used packages access it using linkname.