From f2ee58b6bb3d8312dad2ed7826c1a0e67aea8483 Mon Sep 17 00:00:00 2001 From: David Chase Date: Thu, 13 Aug 2020 20:43:39 -0400 Subject: [PATCH] cmd/compile: using new calls, optimize runtime.memequal(x,constant,1) Proof of concept; also an actual optimization that fires 180 times in the Go source base. Change-Id: I5cb87474be764264cde6e4cbcb471ef109306f08 Reviewed-on: https://go-review.googlesource.com/c/go/+/248404 Trust: David Chase Run-TryBot: David Chase TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- .../compile/internal/ssa/gen/generic.rules | 14 +++- .../compile/internal/ssa/gen/genericOps.go | 5 +- src/cmd/compile/internal/ssa/opGen.go | 6 ++ .../compile/internal/ssa/rewritegeneric.go | 70 +++++++++++++++++++ 4 files changed, 92 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index de0ef9349d..81568b7b7a 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -2001,7 +2001,12 @@ && warnRule(fe.Debug_checknil(), v, "removed nil check") => (Invalid) -// for late-expanded calls +// for rewriting results of some late-expanded rewrites (below) +(SelectN [0] (MakeResult a ___)) => a +(SelectN [1] (MakeResult a b ___)) => b +(SelectN [2] (MakeResult a b c ___)) => c + +// for late-expanded calls, recognize newobject and remove zeroing and nilchecks (Zero (SelectN [0] call:(StaticLECall _ _)) mem:(SelectN [1] call)) && isSameCall(call.Aux, "runtime.newobject") => mem @@ -2026,6 +2031,13 @@ && warnRule(fe.Debug_checknil(), v, "removed nil check") => (Invalid) +// for late-expanded calls, recognize memequal applied to a single constant byte +// TODO figure out breakeven number of bytes for this optimization. +(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [1]) mem) + && isSameCall(callAux, "runtime.memequal") + && symIsRO(scon) + => (MakeResult (Eq8 (Load sptr mem) (Const8 [int8(read8(scon,0))])) mem) + // Evaluate constant address comparisons. (EqPtr x x) => (ConstBool [true]) (NeqPtr x x) => (ConstBool [false]) diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index 23bd4af2cd..db8d7ba0cf 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -538,8 +538,9 @@ var genericOps = []opData{ // pseudo-ops for breaking Tuple {name: "Select0", argLength: 1, zeroWidth: true}, // the first component of a tuple {name: "Select1", argLength: 1, zeroWidth: true}, // the second component of a tuple - {name: "SelectN", argLength: 1, aux: "Int64"}, // arg0=tuple, auxint=field index. Returns the auxint'th member. - {name: "SelectNAddr", argLength: 1, aux: "Int64"}, // arg0=tuple, auxint=field index. Returns the address of auxint'th member. Used for un-SSA-able result types. + {name: "SelectN", argLength: 1, aux: "Int64"}, // arg0=result, auxint=field index. Returns the auxint'th member. + {name: "SelectNAddr", argLength: 1, aux: "Int64"}, // arg0=result, auxint=field index. Returns the address of auxint'th member. Used for un-SSA-able result types. + {name: "MakeResult", argLength: -1}, // arg0 .. are components of a "Result" (like the result from a Call). The last arg should be memory (like the result from a call). // Atomic operations used for semantically inlining sync/atomic and // runtime/internal/atomic. Atomic loads return a new memory so that diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 96aa3adedd..25c1df12ee 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2855,6 +2855,7 @@ const ( OpSelect1 OpSelectN OpSelectNAddr + OpMakeResult OpAtomicLoad8 OpAtomicLoad32 OpAtomicLoad64 @@ -35724,6 +35725,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "MakeResult", + argLen: -1, + generic: true, + }, { name: "AtomicLoad8", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index c9b8f70424..4cb9a8f328 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -394,6 +394,8 @@ func rewriteValuegeneric(v *Value) bool { return rewriteValuegeneric_OpSqrt(v) case OpStaticCall: return rewriteValuegeneric_OpStaticCall(v) + case OpStaticLECall: + return rewriteValuegeneric_OpStaticLECall(v) case OpStore: return rewriteValuegeneric_OpStore(v) case OpStringLen: @@ -20767,6 +20769,36 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool { v_0 := v.Args[0] b := v.Block config := b.Func.Config + // match: (SelectN [0] (MakeResult a ___)) + // result: a + for { + if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpMakeResult || len(v_0.Args) < 1 { + break + } + a := v_0.Args[0] + v.copyOf(a) + return true + } + // match: (SelectN [1] (MakeResult a b ___)) + // result: b + for { + if auxIntToInt64(v.AuxInt) != 1 || v_0.Op != OpMakeResult || len(v_0.Args) < 2 { + break + } + b := v_0.Args[1] + v.copyOf(b) + return true + } + // match: (SelectN [2] (MakeResult a b c ___)) + // result: c + for { + if auxIntToInt64(v.AuxInt) != 2 || v_0.Op != OpMakeResult || len(v_0.Args) < 3 { + break + } + c := v_0.Args[2] + v.copyOf(c) + return true + } // match: (SelectN [0] call:(StaticLECall {sym} dst src (Const64 [sz]) mem)) // cond: sz >= 0 && call.Uses == 1 && isSameCall(sym, "runtime.memmove") && dst.Type.IsPtr() && isInlinableMemmove(dst, src, int64(sz), config) && clobber(call) // result: (Move {dst.Type.Elem()} [int64(sz)] dst src mem) @@ -21367,6 +21399,44 @@ func rewriteValuegeneric_OpStaticCall(v *Value) bool { } return false } +func rewriteValuegeneric_OpStaticLECall(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [1]) mem) + // cond: isSameCall(callAux, "runtime.memequal") && symIsRO(scon) + // result: (MakeResult (Eq8 (Load sptr mem) (Const8 [int8(read8(scon,0))])) mem) + for { + if len(v.Args) != 4 { + break + } + callAux := auxToCall(v.Aux) + mem := v.Args[3] + sptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAddr { + break + } + scon := auxToSym(v_1.Aux) + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpSB { + break + } + v_2 := v.Args[2] + if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 1 || !(isSameCall(callAux, "runtime.memequal") && symIsRO(scon)) { + break + } + v.reset(OpMakeResult) + v0 := b.NewValue0(v.Pos, OpEq8, typ.Bool) + v1 := b.NewValue0(v.Pos, OpLoad, typ.Int8) + v1.AddArg2(sptr, mem) + v2 := b.NewValue0(v.Pos, OpConst8, typ.Int8) + v2.AuxInt = int8ToAuxInt(int8(read8(scon, 0))) + v0.AddArg2(v1, v2) + v.AddArg2(v0, mem) + return true + } + return false +} func rewriteValuegeneric_OpStore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] -- 2.48.1