]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile,internal/bytealg: add MemEq intrinsic for runtime.memequal
authorAlexander Musman <alexander.musman@gmail.com>
Sat, 5 Jul 2025 20:16:36 +0000 (23:16 +0300)
committerGopher Robot <gobot@golang.org>
Wed, 26 Nov 2025 17:58:51 +0000 (09:58 -0800)
Introduce a new MemEq SSA operation for runtime.memequal. The operation
is initially implemented for arm64. The change adds opt rules (following
existing rules for call to runtime.memequal), working with MemEq, and a
later op version LoweredMemEq which may be lowered differently for more
constant size cases in future (for other targets as well as for arm64).
The new MemEq SSA operation does not have memory result, allowing cse of
loads operations around it.

Code size difference (for arm64 linux):

Executable            Old .text  New .text     Change
-------------------------------------------------------
asm                     1970420    1969668     -0.04%
cgo                     1741220    1740212     -0.06%
compile                 8956756    8959428     +0.03%
cover                   1879332    1878772     -0.03%
link                    2574116    2572660     -0.06%
preprofile               867124     866820     -0.04%
vet                     2890404    2888596     -0.06%

Change-Id: I6ab507929b861884d17d5818cfbd152cf7879751
Reviewed-on: https://go-review.googlesource.com/c/go/+/686655
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
15 files changed:
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ir/symtab.go
src/cmd/compile/internal/ssa/_gen/ARM64.rules
src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
src/cmd/compile/internal/ssa/_gen/generic.rules
src/cmd/compile/internal/ssa/_gen/genericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/regalloc.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/compile/internal/ssa/rewritegeneric.go
src/cmd/compile/internal/ssagen/intrinsics.go
src/cmd/compile/internal/ssagen/intrinsics_test.go
src/cmd/compile/internal/ssagen/ssa.go
test/codegen/comparisons.go
test/codegen/memcse.go [new file with mode: 0644]

index 43ecb6b4b715b42dba957eab28d3d3d6f25a19ae..74371104a310ed8abf56381186384f2101502287 100644 (file)
@@ -1322,6 +1322,11 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Name = obj.NAME_EXTERN
                // AuxInt encodes how many buffer entries we need.
                p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
+       case ssa.OpARM64LoweredMemEq:
+               p := s.Prog(obj.ACALL)
+               p.To.Type = obj.TYPE_MEM
+               p.To.Name = obj.NAME_EXTERN
+               p.To.Sym = ir.Syms.Memequal
 
        case ssa.OpARM64LoweredPanicBoundsRR, ssa.OpARM64LoweredPanicBoundsRC, ssa.OpARM64LoweredPanicBoundsCR, ssa.OpARM64LoweredPanicBoundsCC:
                // Compute the constant we put in the PCData entry for this call.
index 4b5bf17a3dea1658635d2ce04edff6ce47239a24..32297354644a296b39138ce2bf37699cf695076a 100644 (file)
@@ -40,6 +40,7 @@ type symsStruct struct {
        MallocGCSmallScanNoHeader [27]*obj.LSym
        MallocGCTiny              [16]*obj.LSym
        Memmove                   *obj.LSym
+       Memequal                  *obj.LSym
        Msanread                  *obj.LSym
        Msanwrite                 *obj.LSym
        Msanmove                  *obj.LSym
index 53bb35d2897b940acde0499ce10ecac2d431a0ab..4ade43f1a148a36d1c5e28784f49289b9e1d22ba 100644 (file)
 (GetClosurePtr ...) => (LoweredGetClosurePtr ...)
 (GetCallerSP   ...) => (LoweredGetCallerSP   ...)
 (GetCallerPC   ...) => (LoweredGetCallerPC   ...)
+(MemEq ...) => (LoweredMemEq ...)
 
 // Absorb pseudo-ops into blocks.
 (If (Equal         cc) yes no) => (EQ cc yes no)
index b710724cca1b466bf181672a1ae5e081dc479f76..c84b24cad1217863ca30403db497392454f64e67 100644 (file)
@@ -534,7 +534,8 @@ func init() {
                {name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                         // call fn by pointer.  arg0=codeptr, last arg=mem, auxint=argsize, returns mem
 
                // pseudo-ops
-               {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil.  arg1=mem.
+               {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}, nilCheck: true, faultOnNilArg0: true},                                                                                                                                                      // panic if arg0 is nil.  arg1=mem.
+               {name: "LoweredMemEq", argLength: 4, reg: regInfo{inputs: []regMask{buildReg("R0"), buildReg("R1"), buildReg("R2")}, outputs: []regMask{buildReg("R0")}, clobbers: callerSave}, typ: "Bool", faultOnNilArg0: true, faultOnNilArg1: true, clobberFlags: true, call: true}, // arg0, arg1 - pointers to memory, arg2=size, arg3=mem.
 
                {name: "Equal", argLength: 1, reg: readflags},            // bool, true flags encode x==y false otherwise.
                {name: "NotEqual", argLength: 1, reg: readflags},         // bool, true flags encode x!=y false otherwise.
index 90ff0b74eca19447046dc1c61d26131f8b34880e..7710f6f2097b7abd571d777e8b6fd87ea53961e8 100644 (file)
   && isSamePtr(p, q)
   => (MakeResult (ConstBool <typ.Bool> [true]) mem)
 
+(MemEq sptr tptr (Const64 [1]) mem)
+  => (Eq8 (Load <typ.Int8> sptr mem) (Load <typ.Int8> tptr mem))
+
+(Load <typ.Int8> sptr:(Addr {scon} (SB)) mem)
+  && symIsRO(scon)
+  => (Const8 <typ.Int8> [int8(read8(scon,0))])
+
+(MemEq sptr tptr (Const64 [2]) mem)
+  && canLoadUnaligned(config)
+  => (Eq16 (Load <typ.Int16> sptr mem) (Load <typ.Int16> tptr mem))
+
+(Load <typ.Int16> sptr:(Addr {scon} (SB)) mem)
+  && symIsRO(scon)
+  => (Const16 <typ.Int16> [int16(read16(scon,0,config.ctxt.Arch.ByteOrder))])
+
+(MemEq sptr tptr (Const64 [4]) mem)
+  && canLoadUnaligned(config)
+  => (Eq32 (Load <typ.Int32> sptr mem) (Load <typ.Int32> tptr mem))
+
+(Load <typ.Int32> sptr:(Addr {scon} (SB)) mem)
+  && symIsRO(scon)
+  => (Const32 <typ.Int32> [int32(read32(scon,0,config.ctxt.Arch.ByteOrder))])
+
+(MemEq sptr tptr (Const64 [8]) mem)
+  && canLoadUnaligned(config) && config.PtrSize == 8
+  => (Eq64 (Load <typ.Int64> sptr mem) (Load <typ.Int64> tptr mem))
+
+(Load <typ.Int64> sptr:(Addr {scon} (SB)) mem)
+  && symIsRO(scon)
+  => (Const64 <typ.Int64> [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])
+
+(MemEq _ _ (Const64 [0]) _) => (ConstBool <typ.Bool> [true])
+
+(MemEq p q _ _) && isSamePtr(p, q) => (ConstBool <typ.Bool> [true])
+
 // Turn known-size calls to memclrNoHeapPointers into a Zero.
 // Note that we are using types.Types[types.TUINT8] instead of sptr.Type.Elem() - see issue 55122 and CL 431496 for more details.
 (SelectN [0] call:(StaticCall {sym} sptr (Const(64|32) [c]) mem))
index ce01f2c0e3dc967b87e85bd011a38505bde84aa6..8637133e5f77e9d2e406bcf9791145ba6b5bd976 100644 (file)
@@ -679,6 +679,9 @@ var genericOps = []opData{
        {name: "PrefetchCache", argLength: 2, hasSideEffects: true},         // Do prefetch arg0 to cache. arg0=addr, arg1=memory.
        {name: "PrefetchCacheStreamed", argLength: 2, hasSideEffects: true}, // Do non-temporal or streamed prefetch arg0 to cache. arg0=addr, arg1=memory.
 
+       // Helper instruction which is semantically equivalent to calling runtime.memequal, but some targets may prefer to custom lower it later, e.g. for specific constant sizes.
+       {name: "MemEq", argLength: 4, commutative: true, typ: "Bool"}, // arg0=ptr0, arg1=ptr1, arg2=size, arg3=memory.
+
        // SIMD
        {name: "ZeroSIMD", argLength: 0}, // zero value of a vector
 
index 966d15b83cae475e8a5d99cba09037183236e2c1..fee0228e7f56913ec9e628eef5806267613b968d 100644 (file)
@@ -4202,6 +4202,7 @@ const (
        OpARM64CALLclosure
        OpARM64CALLinter
        OpARM64LoweredNilCheck
+       OpARM64LoweredMemEq
        OpARM64Equal
        OpARM64NotEqual
        OpARM64LessThan
@@ -5916,6 +5917,7 @@ const (
        OpClobberReg
        OpPrefetchCache
        OpPrefetchCacheStreamed
+       OpMemEq
        OpZeroSIMD
        OpCvt16toMask8x16
        OpCvt32toMask8x32
@@ -65520,6 +65522,25 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "LoweredMemEq",
+               argLen:         4,
+               clobberFlags:   true,
+               call:           true,
+               faultOnNilArg0: true,
+               faultOnNilArg1: true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1}, // R0
+                               {1, 2}, // R1
+                               {2, 4}, // R2
+                       },
+                       clobbers: 9223372035109945343, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       outputs: []outputInfo{
+                               {0, 1}, // R0
+                       },
+               },
+       },
        {
                name:   "Equal",
                argLen: 1,
@@ -85517,6 +85538,12 @@ var opcodeTable = [...]opInfo{
                hasSideEffects: true,
                generic:        true,
        },
+       {
+               name:        "MemEq",
+               argLen:      4,
+               commutative: true,
+               generic:     true,
+       },
        {
                name:    "ZeroSIMD",
                argLen:  0,
index 11dd53bfc7d887b2f0250ec09d0429d2c787199f..a0257f30641e6ab9b5944b643830fc4ad0182c57 100644 (file)
@@ -897,7 +897,15 @@ func (s *regAllocState) dropIfUnused(v *Value) {
        }
        vi := &s.values[v.ID]
        r := vi.uses
-       if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
+       nextCall := s.nextCall[s.curIdx]
+       if opcodeTable[v.Op].call {
+               if s.curIdx == len(s.nextCall)-1 {
+                       nextCall = math.MaxInt32
+               } else {
+                       nextCall = s.nextCall[s.curIdx+1]
+               }
+       }
+       if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > nextCall) {
                s.freeRegs(vi.regs)
        }
 }
@@ -1036,8 +1044,11 @@ func (s *regAllocState) regalloc(f *Func) {
                                regValLiveSet.add(v.ID)
                        }
                }
-               if len(s.nextCall) < len(b.Values) {
-                       s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
+               if cap(s.nextCall) < len(b.Values) {
+                       c := cap(s.nextCall)
+                       s.nextCall = append(s.nextCall[:c], make([]int32, len(b.Values)-c)...)
+               } else {
+                       s.nextCall = s.nextCall[:len(b.Values)]
                }
                var nextCall int32 = math.MaxInt32
                for i := len(b.Values) - 1; i >= 0; i-- {
index b3f790dbda576bd8d7e7fd52b57e4f60a34b5178..25a1c9c0fc1e8d190d9d2e451e85a06606996995 100644 (file)
@@ -840,6 +840,9 @@ func rewriteValueARM64(v *Value) bool {
        case OpMax64F:
                v.Op = OpARM64FMAXD
                return true
+       case OpMemEq:
+               v.Op = OpARM64LoweredMemEq
+               return true
        case OpMin32F:
                v.Op = OpARM64FMINS
                return true
index d40333f0196b9b594cb12cb82368e58c3caffb6d..fea126bd4d499ee69e5b6e3c1e39ba644ee09ba0 100644 (file)
@@ -224,6 +224,8 @@ func rewriteValuegeneric(v *Value) bool {
                return rewriteValuegeneric_OpLsh8x64(v)
        case OpLsh8x8:
                return rewriteValuegeneric_OpLsh8x8(v)
+       case OpMemEq:
+               return rewriteValuegeneric_OpMemEq(v)
        case OpMod16:
                return rewriteValuegeneric_OpMod16(v)
        case OpMod16u:
@@ -11869,6 +11871,8 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       config := b.Func.Config
+       typ := &b.Func.Config.Types
        // match: (Load <t1> p1 (Store {t2} p2 x _))
        // cond: isSamePtr(p1, p2) && copyCompatibleType(t1, x.Type) && t1.Size() == t2.Size()
        // result: x
@@ -12453,6 +12457,102 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Load <typ.Int8> sptr:(Addr {scon} (SB)) mem)
+       // cond: symIsRO(scon)
+       // result: (Const8 <typ.Int8> [int8(read8(scon,0))])
+       for {
+               if v.Type != typ.Int8 {
+                       break
+               }
+               sptr := v_0
+               if sptr.Op != OpAddr {
+                       break
+               }
+               scon := auxToSym(sptr.Aux)
+               sptr_0 := sptr.Args[0]
+               if sptr_0.Op != OpSB {
+                       break
+               }
+               if !(symIsRO(scon)) {
+                       break
+               }
+               v.reset(OpConst8)
+               v.Type = typ.Int8
+               v.AuxInt = int8ToAuxInt(int8(read8(scon, 0)))
+               return true
+       }
+       // match: (Load <typ.Int16> sptr:(Addr {scon} (SB)) mem)
+       // cond: symIsRO(scon)
+       // result: (Const16 <typ.Int16> [int16(read16(scon,0,config.ctxt.Arch.ByteOrder))])
+       for {
+               if v.Type != typ.Int16 {
+                       break
+               }
+               sptr := v_0
+               if sptr.Op != OpAddr {
+                       break
+               }
+               scon := auxToSym(sptr.Aux)
+               sptr_0 := sptr.Args[0]
+               if sptr_0.Op != OpSB {
+                       break
+               }
+               if !(symIsRO(scon)) {
+                       break
+               }
+               v.reset(OpConst16)
+               v.Type = typ.Int16
+               v.AuxInt = int16ToAuxInt(int16(read16(scon, 0, config.ctxt.Arch.ByteOrder)))
+               return true
+       }
+       // match: (Load <typ.Int32> sptr:(Addr {scon} (SB)) mem)
+       // cond: symIsRO(scon)
+       // result: (Const32 <typ.Int32> [int32(read32(scon,0,config.ctxt.Arch.ByteOrder))])
+       for {
+               if v.Type != typ.Int32 {
+                       break
+               }
+               sptr := v_0
+               if sptr.Op != OpAddr {
+                       break
+               }
+               scon := auxToSym(sptr.Aux)
+               sptr_0 := sptr.Args[0]
+               if sptr_0.Op != OpSB {
+                       break
+               }
+               if !(symIsRO(scon)) {
+                       break
+               }
+               v.reset(OpConst32)
+               v.Type = typ.Int32
+               v.AuxInt = int32ToAuxInt(int32(read32(scon, 0, config.ctxt.Arch.ByteOrder)))
+               return true
+       }
+       // match: (Load <typ.Int64> sptr:(Addr {scon} (SB)) mem)
+       // cond: symIsRO(scon)
+       // result: (Const64 <typ.Int64> [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])
+       for {
+               if v.Type != typ.Int64 {
+                       break
+               }
+               sptr := v_0
+               if sptr.Op != OpAddr {
+                       break
+               }
+               scon := auxToSym(sptr.Aux)
+               sptr_0 := sptr.Args[0]
+               if sptr_0.Op != OpSB {
+                       break
+               }
+               if !(symIsRO(scon)) {
+                       break
+               }
+               v.reset(OpConst64)
+               v.Type = typ.Int64
+               v.AuxInt = int64ToAuxInt(int64(read64(scon, 0, config.ctxt.Arch.ByteOrder)))
+               return true
+       }
        // match: (Load (Addr {s} sb) _)
        // cond: isFixedLoad(v, s, 0)
        // result: rewriteFixedLoad(v, s, sb, 0)
@@ -14767,6 +14867,124 @@ func rewriteValuegeneric_OpLsh8x8(v *Value) bool {
        }
        return false
 }
+func rewriteValuegeneric_OpMemEq(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       typ := &b.Func.Config.Types
+       // match: (MemEq sptr tptr (Const64 [1]) mem)
+       // result: (Eq8 (Load <typ.Int8> sptr mem) (Load <typ.Int8> tptr mem))
+       for {
+               sptr := v_0
+               tptr := v_1
+               if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 1 {
+                       break
+               }
+               mem := v_3
+               v.reset(OpEq8)
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Int8)
+               v0.AddArg2(sptr, mem)
+               v1 := b.NewValue0(v.Pos, OpLoad, typ.Int8)
+               v1.AddArg2(tptr, mem)
+               v.AddArg2(v0, v1)
+               return true
+       }
+       // match: (MemEq sptr tptr (Const64 [2]) mem)
+       // cond: canLoadUnaligned(config)
+       // result: (Eq16 (Load <typ.Int16> sptr mem) (Load <typ.Int16> tptr mem))
+       for {
+               sptr := v_0
+               tptr := v_1
+               if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 2 {
+                       break
+               }
+               mem := v_3
+               if !(canLoadUnaligned(config)) {
+                       break
+               }
+               v.reset(OpEq16)
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Int16)
+               v0.AddArg2(sptr, mem)
+               v1 := b.NewValue0(v.Pos, OpLoad, typ.Int16)
+               v1.AddArg2(tptr, mem)
+               v.AddArg2(v0, v1)
+               return true
+       }
+       // match: (MemEq sptr tptr (Const64 [4]) mem)
+       // cond: canLoadUnaligned(config)
+       // result: (Eq32 (Load <typ.Int32> sptr mem) (Load <typ.Int32> tptr mem))
+       for {
+               sptr := v_0
+               tptr := v_1
+               if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 4 {
+                       break
+               }
+               mem := v_3
+               if !(canLoadUnaligned(config)) {
+                       break
+               }
+               v.reset(OpEq32)
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Int32)
+               v0.AddArg2(sptr, mem)
+               v1 := b.NewValue0(v.Pos, OpLoad, typ.Int32)
+               v1.AddArg2(tptr, mem)
+               v.AddArg2(v0, v1)
+               return true
+       }
+       // match: (MemEq sptr tptr (Const64 [8]) mem)
+       // cond: canLoadUnaligned(config) && config.PtrSize == 8
+       // result: (Eq64 (Load <typ.Int64> sptr mem) (Load <typ.Int64> tptr mem))
+       for {
+               sptr := v_0
+               tptr := v_1
+               if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 8 {
+                       break
+               }
+               mem := v_3
+               if !(canLoadUnaligned(config) && config.PtrSize == 8) {
+                       break
+               }
+               v.reset(OpEq64)
+               v0 := b.NewValue0(v.Pos, OpLoad, typ.Int64)
+               v0.AddArg2(sptr, mem)
+               v1 := b.NewValue0(v.Pos, OpLoad, typ.Int64)
+               v1.AddArg2(tptr, mem)
+               v.AddArg2(v0, v1)
+               return true
+       }
+       // match: (MemEq _ _ (Const64 [0]) _)
+       // result: (ConstBool <typ.Bool> [true])
+       for {
+               if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 0 {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.Type = typ.Bool
+               v.AuxInt = boolToAuxInt(true)
+               return true
+       }
+       // match: (MemEq p q _ _)
+       // cond: isSamePtr(p, q)
+       // result: (ConstBool <typ.Bool> [true])
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       p := v_0
+                       q := v_1
+                       if !(isSamePtr(p, q)) {
+                               continue
+                       }
+                       v.reset(OpConstBool)
+                       v.Type = typ.Bool
+                       v.AuxInt = boolToAuxInt(true)
+                       return true
+               }
+               break
+       }
+       return false
+}
 func rewriteValuegeneric_OpMod16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index e346b00a1b3c2b4f5f2a601da283adb50586e445..17beb7b8488455a991eb793e1030d7f0ebbbd880 100644 (file)
@@ -196,6 +196,12 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
                },
                sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X)
 
+       addF("runtime", "memequal",
+               func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+                       return s.newValue4(ssa.OpMemEq, s.f.Config.Types.Bool, args[0], args[1], args[2], s.mem())
+               },
+               sys.ARM64)
+
        if cfg.goppc64 >= 10 {
                // Use only on Power10 as the new byte reverse instructions that Power10 provide
                // make it worthwhile as an intrinsic
index 0c483d49c3377a9f92c2c9cf4279184c707ecedb..91b975c913f7c037f7efe275015b50bbc7a5cf5e 100644 (file)
@@ -327,6 +327,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"arm64", "math/bits", "TrailingZeros64"}:                          struct{}{},
        {"arm64", "math/bits", "TrailingZeros8"}:                           struct{}{},
        {"arm64", "runtime", "KeepAlive"}:                                  struct{}{},
+       {"arm64", "runtime", "memequal"}:                                   struct{}{},
        {"arm64", "runtime", "publicationBarrier"}:                         struct{}{},
        {"arm64", "runtime", "slicebytetostringtmp"}:                       struct{}{},
        {"arm64", "sync", "runtime_LoadAcquintptr"}:                        struct{}{},
index 6dc150f5b2cdd35a639bae640ec95ae1dfb6508c..830e01369722d03a9f94742ac793effd31209fce 100644 (file)
@@ -141,6 +141,7 @@ func InitConfig() {
        }
        ir.Syms.MallocGC = typecheck.LookupRuntimeFunc("mallocgc")
        ir.Syms.Memmove = typecheck.LookupRuntimeFunc("memmove")
+       ir.Syms.Memequal = typecheck.LookupRuntimeFunc("memequal")
        ir.Syms.Msanread = typecheck.LookupRuntimeFunc("msanread")
        ir.Syms.Msanwrite = typecheck.LookupRuntimeFunc("msanwrite")
        ir.Syms.Msanmove = typecheck.LookupRuntimeFunc("msanmove")
index 74a7d689da3661b5b5303373d5ff1e11055ad0b9..bcce21e4044b1271db417cc54088ee57558b0428 100644 (file)
@@ -660,6 +660,17 @@ func equalVarString8(a string) bool {
        return a[:8] == b
 }
 
+func equalVarStringNoSpill(a,b string) bool {
+       s := string("ZZZZZZZZZ")
+       // arm64:".*memequal"
+       memeq1 := a[:9] == s
+       // arm64:-".*"
+       memeq2 := s == a[:9]
+       // arm64:-"MOVB\tR0,.*SP",".*memequal"
+       memeq3 := s == b[:9]
+       return memeq1 && memeq2 && memeq3
+}
+
 func cmpToCmn(a, b, c, d int) int {
        var c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 int
        // arm64:`CMN`,-`CMP`
diff --git a/test/codegen/memcse.go b/test/codegen/memcse.go
new file mode 100644 (file)
index 0000000..d2eb156
--- /dev/null
@@ -0,0 +1,17 @@
+// asmcheck
+
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test common subexpression elimination of loads around other operations.
+
+package codegen
+
+func loadsAroundMemEqual(p *int, s1, s2 string) (int, bool) {
+       x := *p
+       eq := s1 == s2
+       y := *p
+       // arm64:"MOVD ZR, R0"
+       return x - y, eq
+}