From: Wayne Zuo Date: Fri, 8 Apr 2022 08:44:13 +0000 (+0800) Subject: cmd/compile: add SARX instruction for GOAMD64>=3 X-Git-Tag: go1.19beta1~691 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=d6320f1a58f1f7820daee06a086c83a0274a777f;p=gostls13.git cmd/compile: add SARX instruction for GOAMD64>=3 name old time/op new time/op delta ShiftArithmeticRight-8 0.68ns ± 5% 0.30ns ± 6% -56.14% (p=0.000 n=10+10) Change-Id: I052a0d7b9e6526d526276444e588b0cc288beff4 Reviewed-on: https://go-review.googlesource.com/c/go/+/399055 Run-TryBot: Wayne Zuo TryBot-Result: Gopher Robot Reviewed-by: Keith Randall Auto-Submit: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Cherry Mui --- diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 2dae55ba86..9fde775358 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -282,6 +282,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Reg = v.Reg() p.SetFrom3Reg(v.Args[1].Reg()) + case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ: + p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) + p.SetFrom3Reg(v.Args[0].Reg()) + case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload, ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload: p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) diff --git a/src/cmd/compile/internal/amd64/versions_test.go b/src/cmd/compile/internal/amd64/versions_test.go index 11b4d8436a..248f07067f 100644 --- a/src/cmd/compile/internal/amd64/versions_test.go +++ b/src/cmd/compile/internal/amd64/versions_test.go @@ -239,6 +239,7 @@ var featureToOpcodes = map[string][]string{ // native objdump doesn't include [QL] on linux. "popcnt": {"popcntq", "popcntl", "popcnt"}, "bmi1": {"andnq", "andnl", "andn", "blsiq", "blsil", "blsi", "blsmskq", "blsmskl", "blsmsk", "blsrq", "blsrl", "blsr", "tzcntq", "tzcntl", "tzcnt"}, + "bmi2": {"sarxq", "sarxl", "sarx", "shlxq", "shlxl", "shlx", "shrxq", "shrxl", "shrx"}, "sse41": {"roundsd"}, "fma": {"vfmadd231sd"}, "movbe": {"movbeqq", "movbeq", "movbell", "movbel", "movbe"}, diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index d50bdf2a17..3a9de8dd03 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -206,6 +206,9 @@ (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y) (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y) +// Prefer SARX instruction because it has less register restriction on the shift input. +(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y) + // Lowering integer comparisons (Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y)) (Less(64|32|16|8)U x y) => (SETB (CMP(Q|L|W|B) x y)) @@ -803,28 +806,29 @@ (SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x) (SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x) (SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x) - +(SARXQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x) +(SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x) // Operations which don't affect the low 6/5 bits of the shift amount are NOPs. -((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y) -((SHLQ|SHRQ|SARQ) x (NEGQ (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGQ y)) -((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y) -((SHLQ|SHRQ|SARQ) x (NEGQ (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ y)) - -((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y) -((SHLL|SHRL|SARL) x (NEGQ (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGQ y)) -((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y) -((SHLL|SHRL|SARL) x (NEGQ (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ y)) - -((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y) -((SHLQ|SHRQ|SARQ) x (NEGL (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGL y)) -((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y) -((SHLQ|SHRQ|SARQ) x (NEGL (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL y)) - -((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y) -((SHLL|SHRL|SARL) x (NEGL (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGL y)) -((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y) -((SHLL|SHRL|SARL) x (NEGL (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL y)) +((SHLQ|SHRQ|SARQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y) +((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ y)) +((SHLQ|SHRQ|SARQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y) +((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ y)) + +((SHLL|SHRL|SARL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y) +((SHLL|SHRL|SARL|SARXL) x (NEGQ (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ y)) +((SHLL|SHRL|SARL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y) +((SHLL|SHRL|SARL|SARXL) x (NEGQ (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ y)) + +((SHLQ|SHRQ|SARQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y) +((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL y)) +((SHLQ|SHRQ|SARQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y) +((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL y)) + +((SHLL|SHRL|SARL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y) +((SHLL|SHRL|SARL|SARXL) x (NEGL (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGL y)) +((SHLL|SHRL|SARL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y) +((SHLL|SHRL|SARL|SARXL) x (NEGL (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGL y)) // Constant rotate instructions ((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c]) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index ab84504d1a..2eec6e0324 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -953,6 +953,9 @@ func init() { {name: "MOVBEQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVBEQ", scale: 8, aux: "SymOff", symEffect: "Write"}, // swap and store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem // CPUID feature: BMI2. + {name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64 + {name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32 + {name: "SHLXLload", argLength: 3, reg: gp21shxload, asm: "SHLXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 32 {name: "SHLXQload", argLength: 3, reg: gp21shxload, asm: "SHLXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 64 {name: "SHRXLload", argLength: 3, reg: gp21shxload, asm: "SHRXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 1c941e84e1..976d887321 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1062,6 +1062,8 @@ const ( OpAMD64MOVBELstoreidx8 OpAMD64MOVBEQstoreidx1 OpAMD64MOVBEQstoreidx8 + OpAMD64SARXQ + OpAMD64SARXL OpAMD64SHLXLload OpAMD64SHLXQload OpAMD64SHRXLload @@ -14117,6 +14119,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "SARXQ", + argLen: 2, + asm: x86.ASARXQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "SARXL", + argLen: 2, + asm: x86.ASARXL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, { name: "SHLXLload", auxType: auxSymOff, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index addfaaa3a8..81f1f1ae4e 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -382,6 +382,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64SARW(v) case OpAMD64SARWconst: return rewriteValueAMD64_OpAMD64SARWconst(v) + case OpAMD64SARXL: + return rewriteValueAMD64_OpAMD64SARXL(v) + case OpAMD64SARXQ: + return rewriteValueAMD64_OpAMD64SARXQ(v) case OpAMD64SBBLcarrymask: return rewriteValueAMD64_OpAMD64SBBLcarrymask(v) case OpAMD64SBBQ: @@ -19844,6 +19848,19 @@ func rewriteValueAMD64_OpAMD64SARL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (SARL x y) + // cond: buildcfg.GOAMD64 >= 3 + // result: (SARXL x y) + for { + x := v_0 + y := v_1 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64SARXL) + v.AddArg2(x, y) + return true + } // match: (SARL x (MOVQconst [c])) // result: (SARLconst [int8(c&31)] x) for { @@ -20066,6 +20083,19 @@ func rewriteValueAMD64_OpAMD64SARQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (SARQ x y) + // cond: buildcfg.GOAMD64 >= 3 + // result: (SARXQ x y) + for { + x := v_0 + y := v_1 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64SARXQ) + v.AddArg2(x, y) + return true + } // match: (SARQ x (MOVQconst [c])) // result: (SARQconst [int8(c&63)] x) for { @@ -20341,6 +20371,398 @@ func rewriteValueAMD64_OpAMD64SARWconst(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SARXL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SARXL x (MOVQconst [c])) + // result: (SARLconst [int8(c&31)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SARLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true + } + // match: (SARXL x (MOVLconst [c])) + // result: (SARLconst [int8(c&31)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SARLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true + } + // match: (SARXL x (ADDQconst [c] y)) + // cond: c & 31 == 0 + // result: (SARXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SARXL) + v.AddArg2(x, y) + return true + } + // match: (SARXL x (NEGQ (ADDQconst [c] y))) + // cond: c & 31 == 0 + // result: (SARXL x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SARXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SARXL x (ANDQconst [c] y)) + // cond: c & 31 == 31 + // result: (SARXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SARXL) + v.AddArg2(x, y) + return true + } + // match: (SARXL x (NEGQ (ANDQconst [c] y))) + // cond: c & 31 == 31 + // result: (SARXL x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SARXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SARXL x (ADDLconst [c] y)) + // cond: c & 31 == 0 + // result: (SARXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SARXL) + v.AddArg2(x, y) + return true + } + // match: (SARXL x (NEGL (ADDLconst [c] y))) + // cond: c & 31 == 0 + // result: (SARXL x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SARXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SARXL x (ANDLconst [c] y)) + // cond: c & 31 == 31 + // result: (SARXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SARXL) + v.AddArg2(x, y) + return true + } + // match: (SARXL x (NEGL (ANDLconst [c] y))) + // cond: c & 31 == 31 + // result: (SARXL x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SARXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64SARXQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SARXQ x (MOVQconst [c])) + // result: (SARQconst [int8(c&63)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SARQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) + return true + } + // match: (SARXQ x (MOVLconst [c])) + // result: (SARQconst [int8(c&63)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SARQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) + return true + } + // match: (SARXQ x (ADDQconst [c] y)) + // cond: c & 63 == 0 + // result: (SARXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SARXQ) + v.AddArg2(x, y) + return true + } + // match: (SARXQ x (NEGQ (ADDQconst [c] y))) + // cond: c & 63 == 0 + // result: (SARXQ x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SARXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SARXQ x (ANDQconst [c] y)) + // cond: c & 63 == 63 + // result: (SARXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SARXQ) + v.AddArg2(x, y) + return true + } + // match: (SARXQ x (NEGQ (ANDQconst [c] y))) + // cond: c & 63 == 63 + // result: (SARXQ x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SARXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SARXQ x (ADDLconst [c] y)) + // cond: c & 63 == 0 + // result: (SARXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SARXQ) + v.AddArg2(x, y) + return true + } + // match: (SARXQ x (NEGL (ADDLconst [c] y))) + // cond: c & 63 == 0 + // result: (SARXQ x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SARXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SARXQ x (ANDLconst [c] y)) + // cond: c & 63 == 63 + // result: (SARXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SARXQ) + v.AddArg2(x, y) + return true + } + // match: (SARXQ x (NEGL (ANDLconst [c] y))) + // cond: c & 63 == 63 + // result: (SARXQ x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SARXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SBBLcarrymask(v *Value) bool { v_0 := v.Args[0] // match: (SBBLcarrymask (FlagEQ)) diff --git a/src/cmd/compile/internal/test/shift_test.go b/src/cmd/compile/internal/test/shift_test.go index ea88f0a70a..58c8dde1a0 100644 --- a/src/cmd/compile/internal/test/shift_test.go +++ b/src/cmd/compile/internal/test/shift_test.go @@ -1029,3 +1029,13 @@ func TestShiftGeneric(t *testing.T) { } } } + +var shiftSink64 int64 + +func BenchmarkShiftArithmeticRight(b *testing.B) { + x := shiftSink64 + for i := 0; i < b.N; i++ { + x = x >> (i & 63) + } + shiftSink64 = x +} diff --git a/test/codegen/bmi.go b/test/codegen/bmi.go index 2908d1b796..9dd2b0039c 100644 --- a/test/codegen/bmi.go +++ b/test/codegen/bmi.go @@ -46,6 +46,16 @@ func blsr32(x int32) int32 { return x & (x - 1) } +func sarx64(x, y int64) int64 { + // amd64/v3:"SARXQ" + return x >> y +} + +func sarx32(x, y int32) int32 { + // amd64/v3:"SARXL" + return x >> y +} + func shlrx64(x []uint64, i int, s uint64) uint64 { // amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` s = x[i] >> i