From cb702a2a5670d4fa599f573b37b1a01abc9f995a Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Wed, 13 Apr 2022 17:33:24 -0700 Subject: [PATCH] cmd/compile: fold constant shifts into (SHL|SHR|SAR)Xload ops We should prefer a constant shift op to a X shift op. That way we don't have to materialize the constant to shift by. Should fix GOAMD64=v3 builder Change-Id: I56b45d2940c959382b970e3f962ed4a09cc2a239 Reviewed-on: https://go-review.googlesource.com/c/go/+/400254 Reviewed-by: Cherry Mui Reviewed-by: Wayne Zuo Run-TryBot: Dmitri Shuralyov TryBot-Result: Gopher Robot Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/gen/AMD64.rules | 4 + src/cmd/compile/internal/ssa/rewriteAMD64.go | 240 +++++++++++++++++++ 2 files changed, 244 insertions(+) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 1bee810fbf..1fd36bfc88 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -2301,3 +2301,7 @@ (SARX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem) (SHLX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem) (SHRX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem) + +((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVQconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) +((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) +((SHL|SHR|SAR)XLload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Lconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index f5ec7dc003..67ccc99679 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -384,8 +384,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64SARWconst(v) case OpAMD64SARXL: return rewriteValueAMD64_OpAMD64SARXL(v) + case OpAMD64SARXLload: + return rewriteValueAMD64_OpAMD64SARXLload(v) case OpAMD64SARXQ: return rewriteValueAMD64_OpAMD64SARXQ(v) + case OpAMD64SARXQload: + return rewriteValueAMD64_OpAMD64SARXQload(v) case OpAMD64SBBLcarrymask: return rewriteValueAMD64_OpAMD64SBBLcarrymask(v) case OpAMD64SBBQ: @@ -444,8 +448,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64SHLQconst(v) case OpAMD64SHLXL: return rewriteValueAMD64_OpAMD64SHLXL(v) + case OpAMD64SHLXLload: + return rewriteValueAMD64_OpAMD64SHLXLload(v) case OpAMD64SHLXQ: return rewriteValueAMD64_OpAMD64SHLXQ(v) + case OpAMD64SHLXQload: + return rewriteValueAMD64_OpAMD64SHLXQload(v) case OpAMD64SHRB: return rewriteValueAMD64_OpAMD64SHRB(v) case OpAMD64SHRBconst: @@ -464,8 +472,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64SHRWconst(v) case OpAMD64SHRXL: return rewriteValueAMD64_OpAMD64SHRXL(v) + case OpAMD64SHRXLload: + return rewriteValueAMD64_OpAMD64SHRXLload(v) case OpAMD64SHRXQ: return rewriteValueAMD64_OpAMD64SHRXQ(v) + case OpAMD64SHRXQload: + return rewriteValueAMD64_OpAMD64SHRXQload(v) case OpAMD64SUBL: return rewriteValueAMD64_OpAMD64SUBL(v) case OpAMD64SUBLconst: @@ -21625,6 +21637,34 @@ func rewriteValueAMD64_OpAMD64SARXL(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SARXLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SARXLload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SARLconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SARLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SARXQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -21843,6 +21883,54 @@ func rewriteValueAMD64_OpAMD64SARXQ(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SARXQload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SARXQload [off] {sym} ptr (MOVQconst [c]) mem) + // result: (SARQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SARQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) + return true + } + // match: (SARXQload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SARQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SARQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SBBLcarrymask(v *Value) bool { v_0 := v.Args[0] // match: (SBBLcarrymask (FlagEQ)) @@ -26913,6 +27001,34 @@ func rewriteValueAMD64_OpAMD64SHLXL(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SHLXLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SHLXLload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SHLLconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SHLXQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -27131,6 +27247,54 @@ func rewriteValueAMD64_OpAMD64SHLXQ(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SHLXQload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SHLXQload [off] {sym} ptr (MOVQconst [c]) mem) + // result: (SHLQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) + return true + } + // match: (SHLXQload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SHLQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SHRB(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -27985,6 +28149,34 @@ func rewriteValueAMD64_OpAMD64SHRXL(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SHRXLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SHRXLload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SHRLconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHRLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SHRXQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -28203,6 +28395,54 @@ func rewriteValueAMD64_OpAMD64SHRXQ(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SHRXQload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SHRXQload [off] {sym} ptr (MOVQconst [c]) mem) + // result: (SHRQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHRQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) + return true + } + // match: (SHRXQload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SHRQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHRQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SUBL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] -- 2.50.0