From: Ruslan Andreev Date: Wed, 16 Jun 2021 16:25:57 +0000 (+0000) Subject: cmd/compile: inline memequal(x, const, sz) for small sizes X-Git-Tag: go1.18beta1~1026 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=810b08b8ec28ea00bce4c008f7c1b48bc9f3e134;p=gostls13.git cmd/compile: inline memequal(x, const, sz) for small sizes This CL adds late expanded memequal(x, const, sz) inlining for 2, 4, 8 bytes size. This PoC is using the same method as CL 248404. This optimization fires about 100 times in Go compiler (1675 occurrences reduced to 1574, so -6%). Also, added unit-tests to codegen/comparisions.go file. Updates #37275 Change-Id: Ia52808d573cb706d1da8166c5746ede26f46c5da Reviewed-on: https://go-review.googlesource.com/c/go/+/328291 Reviewed-by: Cherry Mui Run-TryBot: Cherry Mui Trust: David Chase --- diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 40db1a6ee8..6dbe9b47d0 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -2011,12 +2011,30 @@ => (Invalid) // for late-expanded calls, recognize memequal applied to a single constant byte -// TODO figure out breakeven number of bytes for this optimization. +// Support is limited by 1, 2, 4, 8 byte sizes (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [1]) mem) && isSameCall(callAux, "runtime.memequal") && symIsRO(scon) => (MakeResult (Eq8 (Load sptr mem) (Const8 [int8(read8(scon,0))])) mem) +(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [2]) mem) + && isSameCall(callAux, "runtime.memequal") + && symIsRO(scon) + && canLoadUnaligned(config) + => (MakeResult (Eq16 (Load sptr mem) (Const16 [int16(read16(scon,0,config.ctxt.Arch.ByteOrder))])) mem) + +(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [4]) mem) + && isSameCall(callAux, "runtime.memequal") + && symIsRO(scon) + && canLoadUnaligned(config) + => (MakeResult (Eq32 (Load sptr mem) (Const32 [int32(read32(scon,0,config.ctxt.Arch.ByteOrder))])) mem) + +(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [8]) mem) + && isSameCall(callAux, "runtime.memequal") + && symIsRO(scon) + && canLoadUnaligned(config) && config.PtrSize == 8 + => (MakeResult (Eq64 (Load sptr mem) (Const64 [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])) mem) + // Evaluate constant address comparisons. (EqPtr x x) => (ConstBool [true]) (NeqPtr x x) => (ConstBool [false]) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 79f9efaebf..2fe0ca64c8 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -415,6 +415,11 @@ func isSameCall(sym interface{}, name string) bool { return fn != nil && fn.String() == name } +// canLoadUnaligned reports if the achitecture supports unaligned load operations +func canLoadUnaligned(c *Config) bool { + return c.ctxt.Arch.Alignment == 1 +} + // nlz returns the number of leading zeros. func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) } func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) } diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index a6757e0d10..fbf227562a 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -21746,6 +21746,7 @@ func rewriteValuegeneric_OpSqrt(v *Value) bool { } func rewriteValuegeneric_OpStaticLECall(v *Value) bool { b := v.Block + config := b.Func.Config typ := &b.Func.Config.Types // match: (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [1]) mem) // cond: isSameCall(callAux, "runtime.memequal") && symIsRO(scon) @@ -21780,6 +21781,105 @@ func rewriteValuegeneric_OpStaticLECall(v *Value) bool { v.AddArg2(v0, mem) return true } + // match: (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [2]) mem) + // cond: isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) + // result: (MakeResult (Eq16 (Load sptr mem) (Const16 [int16(read16(scon,0,config.ctxt.Arch.ByteOrder))])) mem) + for { + if len(v.Args) != 4 { + break + } + callAux := auxToCall(v.Aux) + mem := v.Args[3] + sptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAddr { + break + } + scon := auxToSym(v_1.Aux) + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpSB { + break + } + v_2 := v.Args[2] + if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 2 || !(isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config)) { + break + } + v.reset(OpMakeResult) + v0 := b.NewValue0(v.Pos, OpEq16, typ.Bool) + v1 := b.NewValue0(v.Pos, OpLoad, typ.Int16) + v1.AddArg2(sptr, mem) + v2 := b.NewValue0(v.Pos, OpConst16, typ.Int16) + v2.AuxInt = int16ToAuxInt(int16(read16(scon, 0, config.ctxt.Arch.ByteOrder))) + v0.AddArg2(v1, v2) + v.AddArg2(v0, mem) + return true + } + // match: (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [4]) mem) + // cond: isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) + // result: (MakeResult (Eq32 (Load sptr mem) (Const32 [int32(read32(scon,0,config.ctxt.Arch.ByteOrder))])) mem) + for { + if len(v.Args) != 4 { + break + } + callAux := auxToCall(v.Aux) + mem := v.Args[3] + sptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAddr { + break + } + scon := auxToSym(v_1.Aux) + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpSB { + break + } + v_2 := v.Args[2] + if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 4 || !(isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config)) { + break + } + v.reset(OpMakeResult) + v0 := b.NewValue0(v.Pos, OpEq32, typ.Bool) + v1 := b.NewValue0(v.Pos, OpLoad, typ.Int32) + v1.AddArg2(sptr, mem) + v2 := b.NewValue0(v.Pos, OpConst32, typ.Int32) + v2.AuxInt = int32ToAuxInt(int32(read32(scon, 0, config.ctxt.Arch.ByteOrder))) + v0.AddArg2(v1, v2) + v.AddArg2(v0, mem) + return true + } + // match: (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [8]) mem) + // cond: isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) && config.PtrSize == 8 + // result: (MakeResult (Eq64 (Load sptr mem) (Const64 [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])) mem) + for { + if len(v.Args) != 4 { + break + } + callAux := auxToCall(v.Aux) + mem := v.Args[3] + sptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAddr { + break + } + scon := auxToSym(v_1.Aux) + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpSB { + break + } + v_2 := v.Args[2] + if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 8 || !(isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) && config.PtrSize == 8) { + break + } + v.reset(OpMakeResult) + v0 := b.NewValue0(v.Pos, OpEq64, typ.Bool) + v1 := b.NewValue0(v.Pos, OpLoad, typ.Int64) + v1.AddArg2(sptr, mem) + v2 := b.NewValue0(v.Pos, OpConst64, typ.Int64) + v2.AuxInt = int64ToAuxInt(int64(read64(scon, 0, config.ctxt.Arch.ByteOrder))) + v0.AddArg2(v1, v2) + v.AddArg2(v0, mem) + return true + } return false } func rewriteValuegeneric_OpStore(v *Value) bool { diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go index 17dcd94ae1..35a181f83b 100644 --- a/test/codegen/comparisons.go +++ b/test/codegen/comparisons.go @@ -538,3 +538,65 @@ func CmpToOneU_ex2(a uint8, b uint16, c uint32, d uint64) int { } return 0 } + +// Check that small memequals are replaced with eq instructions + +func equalConstString1() bool { + a := string("A") + b := string("Z") + // amd64:-".*memequal" + // arm64:-".*memequal" + return a == b +} + +func equalVarString1(a string) bool { + b := string("Z") + // amd64:-".*memequal" + // arm64:-".*memequal" + return a[:1] == b +} + +func equalConstString2() bool { + a := string("AA") + b := string("ZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + return a == b +} + +func equalVarString2(a string) bool { + b := string("ZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + return a[:2] == b +} + +func equalConstString4() bool { + a := string("AAAA") + b := string("ZZZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + return a == b +} + +func equalVarString4(a string) bool { + b := string("ZZZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + return a[:4] == b +} + +func equalConstString8() bool { + a := string("AAAAAAAA") + b := string("ZZZZZZZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + return a == b +} + +func equalVarString8(a string) bool { + b := string("ZZZZZZZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + return a[:8] == b +}