From 66f03f79dadc6005d30a6edf4419b8f6c0fa6398 Mon Sep 17 00:00:00 2001 From: Wayne Zuo Date: Sat, 9 Apr 2022 14:40:40 +0800 Subject: [PATCH] cmd/compile: add SHLX&SHRX without load Change-Id: I79eb5e7d6bcb23f26d3a100e915efff6dae70391 Reviewed-on: https://go-review.googlesource.com/c/go/+/399061 Reviewed-by: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Cherry Mui --- src/cmd/compile/internal/amd64/ssa.go | 4 +- src/cmd/compile/internal/ssa/gen/AMD64.rules | 88 +- src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 4 + src/cmd/compile/internal/ssa/opGen.go | 60 + src/cmd/compile/internal/ssa/rewriteAMD64.go | 2422 +++++++++++++++++- test/codegen/bmi.go | 20 +- 6 files changed, 2484 insertions(+), 114 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 1ec8623320..98f90748d6 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -282,7 +282,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Reg = v.Reg() p.SetFrom3Reg(v.Args[1].Reg()) - case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ: + case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ, + ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ, + ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ: p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) p.SetFrom3Reg(v.Args[0].Reg()) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 2ffdea3d55..1bee810fbf 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -206,8 +206,10 @@ (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y) (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y) -// Prefer SARX instruction because it has less register restriction on the shift input. +// Prefer SARX/SHLX/SHRX instruction because it has less register restriction on the shift input. (SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y) +(SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y) +(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y) // Lowering integer comparisons (Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y)) @@ -593,6 +595,8 @@ // mutandis, for UGE and SETAE, and CC and SETCC. ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y)) ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y)) +((NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y)) +((NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y)) ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c)) => ((ULT|UGE) (BTLconst [int8(log32(c))] x)) ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c)) @@ -601,6 +605,8 @@ => ((ULT|UGE) (BTQconst [int8(log64(c))] x)) (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y)) (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y)) +(SET(NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y)) +(SET(NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y)) (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c)) => (SET(B|AE) (BTLconst [int8(log32(c))] x)) (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c)) @@ -612,6 +618,10 @@ => (SET(B|AE)store [off] {sym} ptr (BTL x y) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) => (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem) +(SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem) + => (SET(B|AE)store [off] {sym} ptr (BTL x y) mem) +(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem) + => (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c)) => (SET(B|AE)store [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c)) @@ -624,9 +634,10 @@ (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x) (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x) (BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x) +(BT(Q|L)const [0] s:(SHRXQ x y)) => (BTQ y x) (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x) (BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x) -(BTLconst [0] s:(SHRL x y)) => (BTL y x) +(BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x) // Rewrite a & 1 != 1 into a & 1 == 0. // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test. @@ -638,6 +649,8 @@ // Recognize bit setting (a |= 1< (BTS(Q|L) x y) (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) +(OR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y) +(XOR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) // Convert ORconst into BTS, if the code gets smaller, with boundary being // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). @@ -653,6 +666,8 @@ // Recognize bit clearing: a &^= 1< (BTR(Q|L) x y) (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y) +(AND(Q|L) (NOT(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y) +(ANDN(Q|L) x (SHLX(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y) (ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128 => (BTRQconst [int8(log32(^c))] x) (ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 @@ -794,6 +809,8 @@ (SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x) (SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x) +(SHLXQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x) +(SHLXL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x) (SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x) (SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x) @@ -801,6 +818,8 @@ (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0]) (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x) (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0]) +(SHRXQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x) +(SHRXL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x) (SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x) (SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x) @@ -810,25 +829,25 @@ (SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x) // Operations which don't affect the low 6/5 bits of the shift amount are NOPs. -((SHLQ|SHRQ|SARQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y) -((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ y)) -((SHLQ|SHRQ|SARQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y) -((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ y)) - -((SHLL|SHRL|SARL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y) -((SHLL|SHRL|SARL|SARXL) x (NEGQ (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ y)) -((SHLL|SHRL|SARL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y) -((SHLL|SHRL|SARL|SARXL) x (NEGQ (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ y)) - -((SHLQ|SHRQ|SARQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y) -((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL y)) -((SHLQ|SHRQ|SARQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y) -((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL y)) - -((SHLL|SHRL|SARL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y) -((SHLL|SHRL|SARL|SARXL) x (NEGL (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGL y)) -((SHLL|SHRL|SARL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y) -((SHLL|SHRL|SARL|SARXL) x (NEGL (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGL y)) +((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y) +((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ y)) +((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y) +((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ y)) + +((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y) +((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ y)) +((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y) +((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ y)) + +((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y) +((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL y)) +((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y) +((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL y)) + +((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y) +((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL y)) +((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y) +((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL y)) // Constant rotate instructions ((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c]) @@ -860,9 +879,13 @@ // it in order to strip it out. (ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y) (ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y) +(ORQ (SHLXQ x y) (ANDQ (SHRXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y) +(ORQ (SHRXQ x y) (ANDQ (SHLXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y) (ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y) (ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y) +(ORL (SHLXL x y) (ANDL (SHRXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y) +(ORL (SHRXL x y) (ANDL (SHLXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y) // Help with rotate detection (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) => (FlagLT_ULT) @@ -877,6 +900,15 @@ (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))) && v.Type.Size() == 2 => (RORW x y) +(ORL (SHLXL x (AND(Q|L)const y [15])) + (ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))) + (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16])))) + && v.Type.Size() == 2 + => (ROLW x y) +(ORL (SHRW x (AND(Q|L)const y [15])) + (SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))) + && v.Type.Size() == 2 + => (RORW x y) (ORL (SHLL x (AND(Q|L)const y [ 7])) (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))) @@ -887,6 +919,15 @@ (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))) && v.Type.Size() == 1 => (RORB x y) +(ORL (SHLXL x (AND(Q|L)const y [ 7])) + (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))) + (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8])))) + && v.Type.Size() == 1 + => (ROLB x y) +(ORL (SHRB x (AND(Q|L)const y [ 7])) + (SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))) + && v.Type.Size() == 1 + => (RORB x y) // rotate left negative = rotate right (ROLQ x (NEG(Q|L) y)) => (RORQ x y) @@ -920,6 +961,7 @@ // Multi-register shifts (ORQ (SH(R|L)Q lo bits) (SH(L|R)Q hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits) +(ORQ (SH(R|L)XQ lo bits) (SH(L|R)XQ hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits) // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) // because the x86 instructions are defined to use all 5 bits of the shift even @@ -2257,5 +2299,5 @@ => @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem) (SARX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem) -(SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem) -(SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem) +(SHLX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem) +(SHRX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 23c157c2c5..becee876df 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -955,6 +955,10 @@ func init() { // CPUID feature: BMI2. {name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64 {name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32 + {name: "SHLXQ", argLength: 2, reg: gp21, asm: "SHLXQ"}, // arg0 << arg1, shift amount is mod 64 + {name: "SHLXL", argLength: 2, reg: gp21, asm: "SHLXL"}, // arg0 << arg1, shift amount is mod 32 + {name: "SHRXQ", argLength: 2, reg: gp21, asm: "SHRXQ"}, // unsigned arg0 >> arg1, shift amount is mod 64 + {name: "SHRXL", argLength: 2, reg: gp21, asm: "SHRXL"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 32 {name: "SARXLload", argLength: 3, reg: gp21shxload, asm: "SARXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32 {name: "SARXQload", argLength: 3, reg: gp21shxload, asm: "SARXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index c66c8d33d4..db52b53a28 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1064,6 +1064,10 @@ const ( OpAMD64MOVBEQstoreidx8 OpAMD64SARXQ OpAMD64SARXL + OpAMD64SHLXQ + OpAMD64SHLXL + OpAMD64SHRXQ + OpAMD64SHRXL OpAMD64SARXLload OpAMD64SARXQload OpAMD64SHLXLload @@ -14154,6 +14158,62 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "SHLXQ", + argLen: 2, + asm: x86.ASHLXQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "SHLXL", + argLen: 2, + asm: x86.ASHLXL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "SHRXQ", + argLen: 2, + asm: x86.ASHRXQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "SHRXL", + argLen: 2, + asm: x86.ASHRXL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, { name: "SARXLload", auxType: auxSymOff, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index ecea8f0962..f5ec7dc003 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -442,6 +442,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64SHLQ(v) case OpAMD64SHLQconst: return rewriteValueAMD64_OpAMD64SHLQconst(v) + case OpAMD64SHLXL: + return rewriteValueAMD64_OpAMD64SHLXL(v) + case OpAMD64SHLXQ: + return rewriteValueAMD64_OpAMD64SHLXQ(v) case OpAMD64SHRB: return rewriteValueAMD64_OpAMD64SHRB(v) case OpAMD64SHRBconst: @@ -458,6 +462,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64SHRW(v) case OpAMD64SHRWconst: return rewriteValueAMD64_OpAMD64SHRWconst(v) + case OpAMD64SHRXL: + return rewriteValueAMD64_OpAMD64SHRXL(v) + case OpAMD64SHRXQ: + return rewriteValueAMD64_OpAMD64SHRXQ(v) case OpAMD64SUBL: return rewriteValueAMD64_OpAMD64SUBL(v) case OpAMD64SUBLconst: @@ -2704,6 +2712,29 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool { } break } + // match: (ANDL (NOTL (SHLXL (MOVLconst [1]) y)) x) + // result: (BTRL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64NOTL { + continue + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SHLXL { + continue + } + y := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTRL) + v.AddArg2(x, y) + return true + } + break + } // match: (ANDL (MOVLconst [c]) x) // cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 // result: (BTRLconst [int8(log32(^c))] x) @@ -3121,6 +3152,22 @@ func rewriteValueAMD64_OpAMD64ANDNL(v *Value) bool { v.AddArg2(x, y) return true } + // match: (ANDNL x (SHLXL (MOVLconst [1]) y)) + // result: (BTRL x y) + for { + x := v_0 + if v_1.Op != OpAMD64SHLXL { + break + } + y := v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0.AuxInt) != 1 { + break + } + v.reset(OpAMD64BTRL) + v.AddArg2(x, y) + return true + } return false } func rewriteValueAMD64_OpAMD64ANDNQ(v *Value) bool { @@ -3142,6 +3189,22 @@ func rewriteValueAMD64_OpAMD64ANDNQ(v *Value) bool { v.AddArg2(x, y) return true } + // match: (ANDNQ x (SHLXQ (MOVQconst [1]) y)) + // result: (BTRQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64SHLXQ { + break + } + y := v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0.AuxInt) != 1 { + break + } + v.reset(OpAMD64BTRQ) + v.AddArg2(x, y) + return true + } return false } func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool { @@ -3170,6 +3233,29 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool { } break } + // match: (ANDQ (NOTQ (SHLXQ (MOVQconst [1]) y)) x) + // result: (BTRQ x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64NOTQ { + continue + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SHLXQ { + continue + } + y := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTRQ) + v.AddArg2(x, y) + return true + } + break + } // match: (ANDQ (MOVQconst [c]) x) // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128 // result: (BTRQconst [int8(log64(^c))] x) @@ -3873,6 +3959,22 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool { v.AddArg2(y, x) return true } + // match: (BTLconst [0] s:(SHRXQ x y)) + // result: (BTQ y x) + for { + if auxIntToInt8(v.AuxInt) != 0 { + break + } + s := v_0 + if s.Op != OpAMD64SHRXQ { + break + } + y := s.Args[1] + x := s.Args[0] + v.reset(OpAMD64BTQ) + v.AddArg2(y, x) + return true + } // match: (BTLconst [c] (SHRLconst [d] x)) // cond: (c+d)<32 // result: (BTLconst [c+d] x) @@ -3925,6 +4027,22 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool { v.AddArg2(y, x) return true } + // match: (BTLconst [0] s:(SHRXL x y)) + // result: (BTL y x) + for { + if auxIntToInt8(v.AuxInt) != 0 { + break + } + s := v_0 + if s.Op != OpAMD64SHRXL { + break + } + y := s.Args[1] + x := s.Args[0] + v.reset(OpAMD64BTL) + v.AddArg2(y, x) + return true + } return false } func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool { @@ -3981,6 +4099,22 @@ func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool { v.AddArg2(y, x) return true } + // match: (BTQconst [0] s:(SHRXQ x y)) + // result: (BTQ y x) + for { + if auxIntToInt8(v.AuxInt) != 0 { + break + } + s := v_0 + if s.Op != OpAMD64SHRXQ { + break + } + y := s.Args[1] + x := s.Args[0] + v.reset(OpAMD64BTQ) + v.AddArg2(y, x) + return true + } return false } func rewriteValueAMD64_OpAMD64BTRLconst(v *Value) bool { @@ -15890,6 +16024,25 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } break } + // match: (ORL (SHLXL (MOVLconst [1]) y) x) + // result: (BTSL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXL { + continue + } + y := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTSL) + v.AddArg2(x, y) + return true + } + break + } // match: (ORL (MOVLconst [c]) x) // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 // result: (BTSLconst [int8(log32(c))] x) @@ -16200,6 +16353,206 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } break } + // match: (ORL (SHLXL x y) (ANDL (SHRXL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])))) + // result: (ROLL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXL { + continue + } + y := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpAMD64ANDL { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHRXL { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGQ || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGQ { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] { + continue + } + v.reset(OpAMD64ROLL) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORL (SHLXL x y) (ANDL (SHRXL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])))) + // result: (ROLL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXL { + continue + } + y := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpAMD64ANDL { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHRXL { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGL || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGL { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] { + continue + } + v.reset(OpAMD64ROLL) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORL (SHRXL x y) (ANDL (SHLXL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])))) + // result: (RORL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRXL { + continue + } + y := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpAMD64ANDL { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLXL { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGQ || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGQ { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] { + continue + } + v.reset(OpAMD64RORL) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORL (SHRXL x y) (ANDL (SHLXL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])))) + // result: (RORL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRXL { + continue + } + y := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpAMD64ANDL { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLXL { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGL || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGL { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] { + continue + } + v.reset(OpAMD64RORL) + v.AddArg2(x, y) + return true + } + } + break + } // match: (ORL (SHLL x (ANDQconst y [15])) (ANDL (SHRW x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [15]) [-16])) [16])))) // cond: v.Type.Size() == 2 // result: (ROLW x y) @@ -16408,6 +16761,214 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } break } + // match: (ORL (SHLXL x (ANDQconst y [15])) (ANDL (SHRW x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [15]) [-16])) [16])))) + // cond: v.Type.Size() == 2 + // result: (ROLW x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXL { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 15 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpAMD64ANDL { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHRW { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGQ { + continue + } + v_1_0_1_0 := v_1_0_1.Args[0] + if v_1_0_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -16 { + continue + } + v_1_0_1_0_0 := v_1_0_1_0.Args[0] + if v_1_0_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 15 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 16 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGQ { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -16 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 15 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 2) { + continue + } + v.reset(OpAMD64ROLW) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORL (SHLXL x (ANDLconst y [15])) (ANDL (SHRW x (NEGL (ADDLconst (ANDLconst y [15]) [-16]))) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [15]) [-16])) [16])))) + // cond: v.Type.Size() == 2 + // result: (ROLW x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXL { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 15 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpAMD64ANDL { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHRW { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGL { + continue + } + v_1_0_1_0 := v_1_0_1.Args[0] + if v_1_0_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -16 { + continue + } + v_1_0_1_0_0 := v_1_0_1_0.Args[0] + if v_1_0_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 15 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 16 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGL { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -16 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 15 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 2) { + continue + } + v.reset(OpAMD64ROLW) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORL (SHRW x (ANDQconst y [15])) (SHLXL x (NEGQ (ADDQconst (ANDQconst y [15]) [-16])))) + // cond: v.Type.Size() == 2 + // result: (RORW x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRW { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 15 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpAMD64SHLXL { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGQ { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0.AuxInt) != -16 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 15 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 2) { + continue + } + v.reset(OpAMD64RORW) + v.AddArg2(x, y) + return true + } + break + } + // match: (ORL (SHRW x (ANDLconst y [15])) (SHLXL x (NEGL (ADDLconst (ANDLconst y [15]) [-16])))) + // cond: v.Type.Size() == 2 + // result: (RORW x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRW { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 15 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpAMD64SHLXL { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGL { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0.AuxInt) != -16 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 15 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 2) { + continue + } + v.reset(OpAMD64RORW) + v.AddArg2(x, y) + return true + } + break + } // match: (ORL (SHLL x (ANDQconst y [ 7])) (ANDL (SHRB x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])) [ 8])))) // cond: v.Type.Size() == 1 // result: (ROLB x y) @@ -16616,6 +17177,214 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } break } + // match: (ORL (SHLXL x (ANDQconst y [ 7])) (ANDL (SHRB x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])) [ 8])))) + // cond: v.Type.Size() == 1 + // result: (ROLB x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXL { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 7 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpAMD64ANDL { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHRB { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGQ { + continue + } + v_1_0_1_0 := v_1_0_1.Args[0] + if v_1_0_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -8 { + continue + } + v_1_0_1_0_0 := v_1_0_1_0.Args[0] + if v_1_0_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 7 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 8 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGQ { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -8 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 7 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 1) { + continue + } + v.reset(OpAMD64ROLB) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORL (SHLXL x (ANDLconst y [ 7])) (ANDL (SHRB x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])) [ 8])))) + // cond: v.Type.Size() == 1 + // result: (ROLB x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXL { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 7 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpAMD64ANDL { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHRB { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGL { + continue + } + v_1_0_1_0 := v_1_0_1.Args[0] + if v_1_0_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -8 { + continue + } + v_1_0_1_0_0 := v_1_0_1_0.Args[0] + if v_1_0_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 7 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 8 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGL { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -8 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 7 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 1) { + continue + } + v.reset(OpAMD64ROLB) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORL (SHRB x (ANDQconst y [ 7])) (SHLXL x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])))) + // cond: v.Type.Size() == 1 + // result: (RORB x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRB { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 7 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpAMD64SHLXL { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGQ { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0.AuxInt) != -8 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 7 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 1) { + continue + } + v.reset(OpAMD64RORB) + v.AddArg2(x, y) + return true + } + break + } + // match: (ORL (SHRB x (ANDLconst y [ 7])) (SHLXL x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])))) + // cond: v.Type.Size() == 1 + // result: (RORB x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRB { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 7 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpAMD64SHLXL { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGL { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0.AuxInt) != -8 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 7 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 1) { + continue + } + v.reset(OpAMD64RORB) + v.AddArg2(x, y) + return true + } + break + } // match: (ORL x x) // result: x for { @@ -17509,6 +18278,25 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } + // match: (ORQ (SHLXQ (MOVQconst [1]) y) x) + // result: (BTSQ x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXQ { + continue + } + y := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTSQ) + v.AddArg2(x, y) + return true + } + break + } // match: (ORQ (MOVQconst [c]) x) // cond: isUint64PowerOfTwo(c) && uint64(c) >= 128 // result: (BTSQconst [int8(log64(c))] x) @@ -17789,6 +18577,206 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } + // match: (ORQ (SHLXQ x y) (ANDQ (SHRXQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64])))) + // result: (ROLQ x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXQ { + continue + } + y := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpAMD64ANDQ { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHRXQ { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGQ || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBQcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGQ { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] { + continue + } + v.reset(OpAMD64ROLQ) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORQ (SHLXQ x y) (ANDQ (SHRXQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64])))) + // result: (ROLQ x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXQ { + continue + } + y := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpAMD64ANDQ { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHRXQ { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGL || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBQcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGL { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] { + continue + } + v.reset(OpAMD64ROLQ) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORQ (SHRXQ x y) (ANDQ (SHLXQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64])))) + // result: (RORQ x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRXQ { + continue + } + y := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpAMD64ANDQ { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLXQ { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGQ || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBQcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGQ { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] { + continue + } + v.reset(OpAMD64RORQ) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (ORQ (SHRXQ x y) (ANDQ (SHLXQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64])))) + // result: (RORQ x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRXQ { + continue + } + y := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpAMD64ANDQ { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLXQ { + continue + } + _ = v_1_0.Args[1] + if x != v_1_0.Args[0] { + continue + } + v_1_0_1 := v_1_0.Args[1] + if v_1_0_1.Op != OpAMD64NEGL || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBQcarrymask { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 { + continue + } + v_1_1_0_0 := v_1_1_0.Args[0] + if v_1_1_0_0.Op != OpAMD64NEGL { + continue + } + v_1_1_0_0_0 := v_1_1_0_0.Args[0] + if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 { + continue + } + v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0] + if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] { + continue + } + v.reset(OpAMD64RORQ) + v.AddArg2(x, y) + return true + } + } + break + } // match: (ORQ (SHRQ lo bits) (SHLQ hi (NEGQ bits))) // result: (SHRDQ lo hi bits) for { @@ -17837,6 +18825,54 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } + // match: (ORQ (SHRXQ lo bits) (SHLXQ hi (NEGQ bits))) + // result: (SHRDQ lo hi bits) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRXQ { + continue + } + bits := v_0.Args[1] + lo := v_0.Args[0] + if v_1.Op != OpAMD64SHLXQ { + continue + } + _ = v_1.Args[1] + hi := v_1.Args[0] + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { + continue + } + v.reset(OpAMD64SHRDQ) + v.AddArg3(lo, hi, bits) + return true + } + break + } + // match: (ORQ (SHLXQ lo bits) (SHRXQ hi (NEGQ bits))) + // result: (SHLDQ lo hi bits) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXQ { + continue + } + bits := v_0.Args[1] + lo := v_0.Args[0] + if v_1.Op != OpAMD64SHRXQ { + continue + } + _ = v_1.Args[1] + hi := v_1.Args[0] + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { + continue + } + v.reset(OpAMD64SHLDQ) + v.AddArg3(lo, hi, bits) + return true + } + break + } // match: (ORQ (MOVQconst [c]) (MOVQconst [d])) // result: (MOVQconst [c|d]) for { @@ -22062,6 +23098,60 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool { } break } + // match: (SETEQ (TESTL (SHLXL (MOVLconst [1]) x) y)) + // result: (SETAE (BTL x y)) + for { + if v_0.Op != OpAMD64TESTL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLXL { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (SETEQ (TESTQ (SHLXQ (MOVQconst [1]) x) y)) + // result: (SETAE (BTQ x y)) + for { + if v_0.Op != OpAMD64TESTQ { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLXQ { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } // match: (SETEQ (TESTLconst [c] x)) // cond: isUint32PowerOfTwo(int64(c)) // result: (SETAE (BTLconst [int8(log32(c))] x)) @@ -22487,6 +23577,72 @@ func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool { } break } + // match: (SETEQstore [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem) + // result: (SETAEstore [off] {sym} ptr (BTL x y) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { + break + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLXL { + continue + } + x := v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 { + continue + } + y := v_1_1 + mem := v_2 + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg3(ptr, v0, mem) + return true + } + break + } + // match: (SETEQstore [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem) + // result: (SETAEstore [off] {sym} ptr (BTQ x y) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { + break + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLXQ { + continue + } + x := v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 { + continue + } + y := v_1_1 + mem := v_2 + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg3(ptr, v0, mem) + return true + } + break + } // match: (SETEQstore [off] {sym} ptr (TESTLconst [c] x) mem) // cond: isUint32PowerOfTwo(int64(c)) // result: (SETAEstore [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem) @@ -23978,6 +25134,60 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool { } break } + // match: (SETNE (TESTL (SHLXL (MOVLconst [1]) x) y)) + // result: (SETB (BTL x y)) + for { + if v_0.Op != OpAMD64TESTL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLXL { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (SETNE (TESTQ (SHLXQ (MOVQconst [1]) x) y)) + // result: (SETB (BTQ x y)) + for { + if v_0.Op != OpAMD64TESTQ { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLXQ { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } // match: (SETNE (TESTLconst [c] x)) // cond: isUint32PowerOfTwo(int64(c)) // result: (SETB (BTLconst [int8(log32(c))] x)) @@ -24403,6 +25613,72 @@ func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool { } break } + // match: (SETNEstore [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem) + // result: (SETBstore [off] {sym} ptr (BTL x y) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { + break + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLXL { + continue + } + x := v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 { + continue + } + y := v_1_1 + mem := v_2 + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg3(ptr, v0, mem) + return true + } + break + } + // match: (SETNEstore [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem) + // result: (SETBstore [off] {sym} ptr (BTQ x y) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { + break + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLXQ { + continue + } + x := v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 { + continue + } + y := v_1_1 + mem := v_2 + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg3(ptr, v0, mem) + return true + } + break + } // match: (SETNEstore [off] {sym} ptr (TESTLconst [c] x) mem) // cond: isUint32PowerOfTwo(int64(c)) // result: (SETBstore [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem) @@ -24917,6 +26193,19 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (SHLL x y) + // cond: buildcfg.GOAMD64 >= 3 + // result: (SHLXL x y) + for { + x := v_0 + y := v_1 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64SHLXL) + v.AddArg2(x, y) + return true + } // match: (SHLL x (MOVQconst [c])) // result: (SHLLconst [int8(c&31)] x) for { @@ -25107,28 +26396,6 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { v.AddArg2(x, v0) return true } - // match: (SHLL l:(MOVLload [off] {sym} ptr mem) x) - // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SHLXLload [off] {sym} ptr x mem) - for { - l := v_0 - if l.Op != OpAMD64MOVLload { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64SHLXLload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } return false } func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool { @@ -25173,6 +26440,19 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (SHLQ x y) + // cond: buildcfg.GOAMD64 >= 3 + // result: (SHLXQ x y) + for { + x := v_0 + y := v_1 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64SHLXQ) + v.AddArg2(x, y) + return true + } // match: (SHLQ x (MOVQconst [c])) // result: (SHLQconst [int8(c&63)] x) for { @@ -25363,28 +26643,6 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { v.AddArg2(x, v0) return true } - // match: (SHLQ l:(MOVQload [off] {sym} ptr mem) x) - // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SHLXQload [off] {sym} ptr x mem) - for { - l := v_0 - if l.Op != OpAMD64MOVQload { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64SHLXQload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } return false } func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool { @@ -25437,6 +26695,442 @@ func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SHLXL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SHLXL x (MOVQconst [c])) + // result: (SHLLconst [int8(c&31)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true + } + // match: (SHLXL x (MOVLconst [c])) + // result: (SHLLconst [int8(c&31)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true + } + // match: (SHLXL x (ADDQconst [c] y)) + // cond: c & 31 == 0 + // result: (SHLXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHLXL) + v.AddArg2(x, y) + return true + } + // match: (SHLXL x (NEGQ (ADDQconst [c] y))) + // cond: c & 31 == 0 + // result: (SHLXL x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHLXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHLXL x (ANDQconst [c] y)) + // cond: c & 31 == 31 + // result: (SHLXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHLXL) + v.AddArg2(x, y) + return true + } + // match: (SHLXL x (NEGQ (ANDQconst [c] y))) + // cond: c & 31 == 31 + // result: (SHLXL x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHLXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHLXL x (ADDLconst [c] y)) + // cond: c & 31 == 0 + // result: (SHLXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHLXL) + v.AddArg2(x, y) + return true + } + // match: (SHLXL x (NEGL (ADDLconst [c] y))) + // cond: c & 31 == 0 + // result: (SHLXL x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHLXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHLXL x (ANDLconst [c] y)) + // cond: c & 31 == 31 + // result: (SHLXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHLXL) + v.AddArg2(x, y) + return true + } + // match: (SHLXL x (NEGL (ANDLconst [c] y))) + // cond: c & 31 == 31 + // result: (SHLXL x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHLXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHLXL l:(MOVLload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (SHLXLload [off] {sym} ptr x mem) + for { + l := v_0 + if l.Op != OpAMD64MOVLload { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64SHLXLload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64SHLXQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SHLXQ x (MOVQconst [c])) + // result: (SHLQconst [int8(c&63)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) + return true + } + // match: (SHLXQ x (MOVLconst [c])) + // result: (SHLQconst [int8(c&63)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) + return true + } + // match: (SHLXQ x (ADDQconst [c] y)) + // cond: c & 63 == 0 + // result: (SHLXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHLXQ) + v.AddArg2(x, y) + return true + } + // match: (SHLXQ x (NEGQ (ADDQconst [c] y))) + // cond: c & 63 == 0 + // result: (SHLXQ x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHLXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHLXQ x (ANDQconst [c] y)) + // cond: c & 63 == 63 + // result: (SHLXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHLXQ) + v.AddArg2(x, y) + return true + } + // match: (SHLXQ x (NEGQ (ANDQconst [c] y))) + // cond: c & 63 == 63 + // result: (SHLXQ x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHLXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHLXQ x (ADDLconst [c] y)) + // cond: c & 63 == 0 + // result: (SHLXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHLXQ) + v.AddArg2(x, y) + return true + } + // match: (SHLXQ x (NEGL (ADDLconst [c] y))) + // cond: c & 63 == 0 + // result: (SHLXQ x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHLXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHLXQ x (ANDLconst [c] y)) + // cond: c & 63 == 63 + // result: (SHLXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHLXQ) + v.AddArg2(x, y) + return true + } + // match: (SHLXQ x (NEGL (ANDLconst [c] y))) + // cond: c & 63 == 63 + // result: (SHLXQ x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHLXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHLXQ l:(MOVQload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (SHLXQload [off] {sym} ptr x mem) + for { + l := v_0 + if l.Op != OpAMD64MOVQload { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64SHLXQload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SHRB(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -25524,6 +27218,19 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (SHRL x y) + // cond: buildcfg.GOAMD64 >= 3 + // result: (SHRXL x y) + for { + x := v_0 + y := v_1 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64SHRXL) + v.AddArg2(x, y) + return true + } // match: (SHRL x (MOVQconst [c])) // result: (SHRLconst [int8(c&31)] x) for { @@ -25714,28 +27421,6 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool { v.AddArg2(x, v0) return true } - // match: (SHRL l:(MOVLload [off] {sym} ptr mem) x) - // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SHRXLload [off] {sym} ptr x mem) - for { - l := v_0 - if l.Op != OpAMD64MOVLload { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64SHRXLload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } return false } func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool { @@ -25768,6 +27453,19 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (SHRQ x y) + // cond: buildcfg.GOAMD64 >= 3 + // result: (SHRXQ x y) + for { + x := v_0 + y := v_1 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64SHRXQ) + v.AddArg2(x, y) + return true + } // match: (SHRQ x (MOVQconst [c])) // result: (SHRQconst [int8(c&63)] x) for { @@ -25958,28 +27656,6 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool { v.AddArg2(x, v0) return true } - // match: (SHRQ l:(MOVQload [off] {sym} ptr mem) x) - // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SHRXQload [off] {sym} ptr x mem) - for { - l := v_0 - if l.Op != OpAMD64MOVQload { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64SHRXQload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } return false } func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool { @@ -26091,6 +27767,442 @@ func rewriteValueAMD64_OpAMD64SHRWconst(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SHRXL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SHRXL x (MOVQconst [c])) + // result: (SHRLconst [int8(c&31)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SHRLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true + } + // match: (SHRXL x (MOVLconst [c])) + // result: (SHRLconst [int8(c&31)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SHRLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true + } + // match: (SHRXL x (ADDQconst [c] y)) + // cond: c & 31 == 0 + // result: (SHRXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHRXL) + v.AddArg2(x, y) + return true + } + // match: (SHRXL x (NEGQ (ADDQconst [c] y))) + // cond: c & 31 == 0 + // result: (SHRXL x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHRXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHRXL x (ANDQconst [c] y)) + // cond: c & 31 == 31 + // result: (SHRXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHRXL) + v.AddArg2(x, y) + return true + } + // match: (SHRXL x (NEGQ (ANDQconst [c] y))) + // cond: c & 31 == 31 + // result: (SHRXL x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHRXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHRXL x (ADDLconst [c] y)) + // cond: c & 31 == 0 + // result: (SHRXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHRXL) + v.AddArg2(x, y) + return true + } + // match: (SHRXL x (NEGL (ADDLconst [c] y))) + // cond: c & 31 == 0 + // result: (SHRXL x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHRXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHRXL x (ANDLconst [c] y)) + // cond: c & 31 == 31 + // result: (SHRXL x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHRXL) + v.AddArg2(x, y) + return true + } + // match: (SHRXL x (NEGL (ANDLconst [c] y))) + // cond: c & 31 == 31 + // result: (SHRXL x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHRXL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHRXL l:(MOVLload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (SHRXLload [off] {sym} ptr x mem) + for { + l := v_0 + if l.Op != OpAMD64MOVLload { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64SHRXLload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64SHRXQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SHRXQ x (MOVQconst [c])) + // result: (SHRQconst [int8(c&63)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SHRQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) + return true + } + // match: (SHRXQ x (MOVLconst [c])) + // result: (SHRQconst [int8(c&63)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SHRQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) + return true + } + // match: (SHRXQ x (ADDQconst [c] y)) + // cond: c & 63 == 0 + // result: (SHRXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHRXQ) + v.AddArg2(x, y) + return true + } + // match: (SHRXQ x (NEGQ (ADDQconst [c] y))) + // cond: c & 63 == 0 + // result: (SHRXQ x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHRXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHRXQ x (ANDQconst [c] y)) + // cond: c & 63 == 63 + // result: (SHRXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHRXQ) + v.AddArg2(x, y) + return true + } + // match: (SHRXQ x (NEGQ (ANDQconst [c] y))) + // cond: c & 63 == 63 + // result: (SHRXQ x (NEGQ y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHRXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHRXQ x (ADDLconst [c] y)) + // cond: c & 63 == 0 + // result: (SHRXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHRXQ) + v.AddArg2(x, y) + return true + } + // match: (SHRXQ x (NEGL (ADDLconst [c] y))) + // cond: c & 63 == 0 + // result: (SHRXQ x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHRXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHRXQ x (ANDLconst [c] y)) + // cond: c & 63 == 63 + // result: (SHRXQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHRXQ) + v.AddArg2(x, y) + return true + } + // match: (SHRXQ x (NEGL (ANDLconst [c] y))) + // cond: c & 63 == 63 + // result: (SHRXQ x (NEGL y)) + for { + x := v_0 + if v_1.Op != OpAMD64NEGL { + break + } + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHRXQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + // match: (SHRXQ l:(MOVQload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (SHRXQload [off] {sym} ptr x mem) + for { + l := v_0 + if l.Op != OpAMD64MOVQload { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64SHRXQload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SUBL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -27374,6 +29486,25 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool { } break } + // match: (XORL (SHLXL (MOVLconst [1]) y) x) + // result: (BTCL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXL { + continue + } + y := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTCL) + v.AddArg2(x, y) + return true + } + break + } // match: (XORL (MOVLconst [c]) x) // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 // result: (BTCLconst [int8(log32(c))] x) @@ -27911,6 +30042,25 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool { } break } + // match: (XORQ (SHLXQ (MOVQconst [1]) y) x) + // result: (BTCQ x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXQ { + continue + } + y := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTCQ) + v.AddArg2(x, y) + return true + } + break + } // match: (XORQ (MOVQconst [c]) x) // cond: isUint64PowerOfTwo(c) && uint64(c) >= 128 // result: (BTCQconst [int8(log64(c))] x) @@ -34533,6 +36683,54 @@ func rewriteBlockAMD64(b *Block) bool { } break } + // match: (EQ (TESTL (SHLXL (MOVLconst [1]) x) y)) + // result: (UGE (BTL x y)) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLXL { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v0 := b.NewValue0(v_0.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (TESTQ (SHLXQ (MOVQconst [1]) x) y)) + // result: (UGE (BTQ x y)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLXQ { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } // match: (EQ (TESTLconst [c] x)) // cond: isUint32PowerOfTwo(int64(c)) // result: (UGE (BTLconst [int8(log32(c))] x)) @@ -35336,6 +37534,54 @@ func rewriteBlockAMD64(b *Block) bool { } break } + // match: (NE (TESTL (SHLXL (MOVLconst [1]) x) y)) + // result: (ULT (BTL x y)) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLXL { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v0 := b.NewValue0(v_0.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break + } + // match: (NE (TESTQ (SHLXQ (MOVQconst [1]) x) y)) + // result: (ULT (BTQ x y)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLXQ { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break + } // match: (NE (TESTLconst [c] x)) // cond: isUint32PowerOfTwo(int64(c)) // result: (ULT (BTLconst [int8(log32(c))] x)) diff --git a/test/codegen/bmi.go b/test/codegen/bmi.go index 1641d5ddd0..3b125a1b59 100644 --- a/test/codegen/bmi.go +++ b/test/codegen/bmi.go @@ -72,7 +72,23 @@ func sarx32_load(x []int32, i int) int32 { return s } -func shlrx64(x []uint64, i int, s uint64) uint64 { +func shlrx64(x, y uint64) uint64 { + // amd64/v3:"SHRXQ" + s := x >> y + // amd64/v3:"SHLXQ" + s = s << y + return s +} + +func shlrx32(x, y uint32) uint32 { + // amd64/v3:"SHRXL" + s := x >> y + // amd64/v3:"SHLXL" + s = s << y + return s +} + +func shlrx64_load(x []uint64, i int, s uint64) uint64 { // amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` s = x[i] >> i // amd64/v3: `SHLXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` @@ -80,7 +96,7 @@ func shlrx64(x []uint64, i int, s uint64) uint64 { return s } -func shlrx32(x []uint32, i int, s uint32) uint32 { +func shlrx32_load(x []uint32, i int, s uint32) uint32 { // amd64/v3: `SHRXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` s = x[i] >> i // amd64/v3: `SHLXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` -- 2.48.1