]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: add SHLX&SHRX without load
authorWayne Zuo <wdvxdr@golangcn.org>
Sat, 9 Apr 2022 06:40:40 +0000 (14:40 +0800)
committerKeith Randall <khr@golang.org>
Wed, 13 Apr 2022 17:48:36 +0000 (17:48 +0000)
Change-Id: I79eb5e7d6bcb23f26d3a100e915efff6dae70391
Reviewed-on: https://go-review.googlesource.com/c/go/+/399061
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/bmi.go

index 1ec86233209f1dce9e0cc1e6bf7032cb1424f33b..98f90748d6e9000283c599459c81c5ffe8b445ba 100644 (file)
@@ -282,7 +282,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Reg = v.Reg()
                p.SetFrom3Reg(v.Args[1].Reg())
 
-       case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ:
+       case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
+               ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
+               ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
                p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
                p.SetFrom3Reg(v.Args[0].Reg())
 
index 2ffdea3d559c5d7bab675aa99e72c68f9fe3b318..1bee810fbf30ed0635a4c7f80969765f3558824c 100644 (file)
 (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
 (Rsh8x(64|32|16|8) x y)  && shiftIsBounded(v) => (SARB x y)
 
-// Prefer SARX instruction because it has less register restriction on the shift input.
+// Prefer SARX/SHLX/SHRX instruction because it has less register restriction on the shift input.
 (SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y)
+(SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y)
+(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
 
 // Lowering integer comparisons
 (Less(64|32|16|8)      x y) => (SETL  (CMP(Q|L|W|B)     x y))
 // mutandis, for UGE and SETAE, and CC and SETCC.
 ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
 ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
+((NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
+((NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
 ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
     => ((ULT|UGE) (BTLconst [int8(log32(c))] x))
 ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
     => ((ULT|UGE) (BTQconst [int8(log64(c))] x))
 (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE)  (BTL x y))
 (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE)  (BTQ x y))
+(SET(NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => (SET(B|AE)  (BTL x y))
+(SET(NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => (SET(B|AE)  (BTQ x y))
 (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
     => (SET(B|AE)  (BTLconst [int8(log32(c))] x))
 (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
     => (SET(B|AE)store  [off] {sym} ptr (BTL x y) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
     => (SET(B|AE)store  [off] {sym} ptr (BTQ x y) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem)
+    => (SET(B|AE)store  [off] {sym} ptr (BTL x y) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem)
+    => (SET(B|AE)store  [off] {sym} ptr (BTQ x y) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c))
     => (SET(B|AE)store  [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
 (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
 (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d      => (BT(Q|L)const [c-d] x)
 (BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
+(BT(Q|L)const [0] s:(SHRXQ x y)) => (BTQ y x)
 (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
 (BTLconst [c] (SHLLconst [d] x)) && c>d      => (BTLconst [c-d] x)
-(BTLconst [0] s:(SHRL x y)) => (BTL y x)
+(BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x)
 
 // Rewrite a & 1 != 1 into a & 1 == 0.
 // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test.
 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
 (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
+(OR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
+(XOR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
 
 // Convert ORconst into BTS, if the code gets smaller, with boundary being
 // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
 // Recognize bit clearing: a &^= 1<<b
 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
 (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
+(AND(Q|L) (NOT(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
+(ANDN(Q|L) x (SHLX(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
 (ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
     => (BTRQconst [int8(log32(^c))] x)
 (ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
 
 (SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
 (SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
+(SHLXQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
+(SHLXL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
 
 (SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
 (SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
 (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0])
 (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x)
 (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0])
+(SHRXQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
+(SHRXL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
 
 (SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
 (SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
 (SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
 
 // Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
-((SHLQ|SHRQ|SARQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
-((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
-((SHLQ|SHRQ|SARQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
-((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
-
-((SHLL|SHRL|SARL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL|SARXL) x y)
-((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
-((SHLL|SHRL|SARL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
-((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
-
-((SHLQ|SHRQ|SARQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
-((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
-((SHLQ|SHRQ|SARQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
-((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
-
-((SHLL|SHRL|SARL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL|SARXL) x y)
-((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
-((SHLL|SHRL|SARL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
-((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))
+
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))
+
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))
+
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))
 
 // Constant rotate instructions
 ((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c])
 // it in order to strip it out.
 (ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
 (ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)
+(ORQ (SHLXQ x y) (ANDQ (SHRXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
+(ORQ (SHRXQ x y) (ANDQ (SHLXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)
 
 (ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
 (ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)
+(ORL (SHLXL x y) (ANDL (SHRXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
+(ORL (SHRXL x y) (ANDL (SHLXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)
 
 // Help with rotate detection
 (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) => (FlagLT_ULT)
      (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
   && v.Type.Size() == 2
   => (RORW x y)
+(ORL (SHLXL x (AND(Q|L)const y [15]))
+     (ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))
+           (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16]))))
+  && v.Type.Size() == 2
+  => (ROLW x y)
+(ORL (SHRW x (AND(Q|L)const y [15]))
+     (SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
+  && v.Type.Size() == 2
+  => (RORW x y)
 
 (ORL (SHLL x (AND(Q|L)const y [ 7]))
      (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
      (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
   && v.Type.Size() == 1
   => (RORB x y)
+(ORL (SHLXL x (AND(Q|L)const y [ 7]))
+     (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
+           (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8]))))
+  && v.Type.Size() == 1
+  => (ROLB x y)
+(ORL (SHRB x (AND(Q|L)const y [ 7]))
+     (SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
+  && v.Type.Size() == 1
+  => (RORB x y)
 
 // rotate left negative = rotate right
 (ROLQ x (NEG(Q|L) y)) => (RORQ x y)
 
 // Multi-register shifts
 (ORQ (SH(R|L)Q lo bits) (SH(L|R)Q hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits)
+(ORQ (SH(R|L)XQ lo bits) (SH(L|R)XQ hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits)
 
 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
 // because the x86 instructions are defined to use all 5 bits of the shift even
   => @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
 
 (SARX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem)
-(SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)  => (SHLX(Q|L)load [off] {sym} ptr x mem)
-(SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)  => (SHRX(Q|L)load [off] {sym} ptr x mem)
+(SHLX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
+(SHRX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
index 23c157c2c56af0e78d36bc2ad62389ec3c800e4d..becee876dfcdaaabdb43415cb5cab7fe1967ec72 100644 (file)
@@ -955,6 +955,10 @@ func init() {
                // CPUID feature: BMI2.
                {name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64
                {name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32
+               {name: "SHLXQ", argLength: 2, reg: gp21, asm: "SHLXQ"}, // arg0 << arg1, shift amount is mod 64
+               {name: "SHLXL", argLength: 2, reg: gp21, asm: "SHLXL"}, // arg0 << arg1, shift amount is mod 32
+               {name: "SHRXQ", argLength: 2, reg: gp21, asm: "SHRXQ"}, // unsigned arg0 >> arg1, shift amount is mod 64
+               {name: "SHRXL", argLength: 2, reg: gp21, asm: "SHRXL"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 32
 
                {name: "SARXLload", argLength: 3, reg: gp21shxload, asm: "SARXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32
                {name: "SARXQload", argLength: 3, reg: gp21shxload, asm: "SARXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64
index c66c8d33d4a03473a7ce3b8b8821e769ebc4c322..db52b53a28f1967ec98aff56008f95d240320b10 100644 (file)
@@ -1064,6 +1064,10 @@ const (
        OpAMD64MOVBEQstoreidx8
        OpAMD64SARXQ
        OpAMD64SARXL
+       OpAMD64SHLXQ
+       OpAMD64SHLXL
+       OpAMD64SHRXQ
+       OpAMD64SHRXL
        OpAMD64SARXLload
        OpAMD64SARXQload
        OpAMD64SHLXLload
@@ -14154,6 +14158,62 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "SHLXQ",
+               argLen: 2,
+               asm:    x86.ASHLXQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:   "SHLXL",
+               argLen: 2,
+               asm:    x86.ASHLXL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:   "SHRXQ",
+               argLen: 2,
+               asm:    x86.ASHRXQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:   "SHRXL",
+               argLen: 2,
+               asm:    x86.ASHRXL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
        {
                name:           "SARXLload",
                auxType:        auxSymOff,
index ecea8f09623e850ce8e831ecfd75a27995a8aef7..f5ec7dc00375305c47abaac139d332a649dcbc4e 100644 (file)
@@ -442,6 +442,10 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64SHLQ(v)
        case OpAMD64SHLQconst:
                return rewriteValueAMD64_OpAMD64SHLQconst(v)
+       case OpAMD64SHLXL:
+               return rewriteValueAMD64_OpAMD64SHLXL(v)
+       case OpAMD64SHLXQ:
+               return rewriteValueAMD64_OpAMD64SHLXQ(v)
        case OpAMD64SHRB:
                return rewriteValueAMD64_OpAMD64SHRB(v)
        case OpAMD64SHRBconst:
@@ -458,6 +462,10 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64SHRW(v)
        case OpAMD64SHRWconst:
                return rewriteValueAMD64_OpAMD64SHRWconst(v)
+       case OpAMD64SHRXL:
+               return rewriteValueAMD64_OpAMD64SHRXL(v)
+       case OpAMD64SHRXQ:
+               return rewriteValueAMD64_OpAMD64SHRXQ(v)
        case OpAMD64SUBL:
                return rewriteValueAMD64_OpAMD64SUBL(v)
        case OpAMD64SUBLconst:
@@ -2704,6 +2712,29 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
                }
                break
        }
+       // match: (ANDL (NOTL (SHLXL (MOVLconst [1]) y)) x)
+       // result: (BTRL x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64NOTL {
+                               continue
+                       }
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       y := v_0_0.Args[1]
+                       v_0_0_0 := v_0_0.Args[0]
+                       if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       x := v_1
+                       v.reset(OpAMD64BTRL)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
        // match: (ANDL (MOVLconst [c]) x)
        // cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
        // result: (BTRLconst [int8(log32(^c))] x)
@@ -3121,6 +3152,22 @@ func rewriteValueAMD64_OpAMD64ANDNL(v *Value) bool {
                v.AddArg2(x, y)
                return true
        }
+       // match: (ANDNL x (SHLXL (MOVLconst [1]) y))
+       // result: (BTRL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64SHLXL {
+                       break
+               }
+               y := v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0.AuxInt) != 1 {
+                       break
+               }
+               v.reset(OpAMD64BTRL)
+               v.AddArg2(x, y)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64ANDNQ(v *Value) bool {
@@ -3142,6 +3189,22 @@ func rewriteValueAMD64_OpAMD64ANDNQ(v *Value) bool {
                v.AddArg2(x, y)
                return true
        }
+       // match: (ANDNQ x (SHLXQ (MOVQconst [1]) y))
+       // result: (BTRQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64SHLXQ {
+                       break
+               }
+               y := v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0.AuxInt) != 1 {
+                       break
+               }
+               v.reset(OpAMD64BTRQ)
+               v.AddArg2(x, y)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
@@ -3170,6 +3233,29 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
                }
                break
        }
+       // match: (ANDQ (NOTQ (SHLXQ (MOVQconst [1]) y)) x)
+       // result: (BTRQ x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64NOTQ {
+                               continue
+                       }
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       y := v_0_0.Args[1]
+                       v_0_0_0 := v_0_0.Args[0]
+                       if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       x := v_1
+                       v.reset(OpAMD64BTRQ)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
        // match: (ANDQ (MOVQconst [c]) x)
        // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128
        // result: (BTRQconst [int8(log64(^c))] x)
@@ -3873,6 +3959,22 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool {
                v.AddArg2(y, x)
                return true
        }
+       // match: (BTLconst [0] s:(SHRXQ x y))
+       // result: (BTQ y x)
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               s := v_0
+               if s.Op != OpAMD64SHRXQ {
+                       break
+               }
+               y := s.Args[1]
+               x := s.Args[0]
+               v.reset(OpAMD64BTQ)
+               v.AddArg2(y, x)
+               return true
+       }
        // match: (BTLconst [c] (SHRLconst [d] x))
        // cond: (c+d)<32
        // result: (BTLconst [c+d] x)
@@ -3925,6 +4027,22 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool {
                v.AddArg2(y, x)
                return true
        }
+       // match: (BTLconst [0] s:(SHRXL x y))
+       // result: (BTL y x)
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               s := v_0
+               if s.Op != OpAMD64SHRXL {
+                       break
+               }
+               y := s.Args[1]
+               x := s.Args[0]
+               v.reset(OpAMD64BTL)
+               v.AddArg2(y, x)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool {
@@ -3981,6 +4099,22 @@ func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool {
                v.AddArg2(y, x)
                return true
        }
+       // match: (BTQconst [0] s:(SHRXQ x y))
+       // result: (BTQ y x)
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               s := v_0
+               if s.Op != OpAMD64SHRXQ {
+                       break
+               }
+               y := s.Args[1]
+               x := s.Args[0]
+               v.reset(OpAMD64BTQ)
+               v.AddArg2(y, x)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64BTRLconst(v *Value) bool {
@@ -15890,6 +16024,25 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                }
                break
        }
+       // match: (ORL (SHLXL (MOVLconst [1]) y) x)
+       // result: (BTSL x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       x := v_1
+                       v.reset(OpAMD64BTSL)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
        // match: (ORL (MOVLconst [c]) x)
        // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
        // result: (BTSLconst [int8(log32(c))] x)
@@ -16200,6 +16353,206 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                }
                break
        }
+       // match: (ORL (SHLXL x y) (ANDL (SHRXL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))))
+       // result: (ROLL x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if v_1.Op != OpAMD64ANDL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHRXL {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGQ || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGQ {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] {
+                                       continue
+                               }
+                               v.reset(OpAMD64ROLL)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORL (SHLXL x y) (ANDL (SHRXL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))))
+       // result: (ROLL x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if v_1.Op != OpAMD64ANDL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHRXL {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGL || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGL {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] {
+                                       continue
+                               }
+                               v.reset(OpAMD64ROLL)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORL (SHRXL x y) (ANDL (SHLXL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))))
+       // result: (RORL x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHRXL {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if v_1.Op != OpAMD64ANDL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHLXL {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGQ || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGQ {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] {
+                                       continue
+                               }
+                               v.reset(OpAMD64RORL)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORL (SHRXL x y) (ANDL (SHLXL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))))
+       // result: (RORL x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHRXL {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if v_1.Op != OpAMD64ANDL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHLXL {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGL || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGL {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] {
+                                       continue
+                               }
+                               v.reset(OpAMD64RORL)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
        // match: (ORL (SHLL x (ANDQconst y [15])) (ANDL (SHRW x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [15]) [-16])) [16]))))
        // cond: v.Type.Size() == 2
        // result: (ROLW x y)
@@ -16408,6 +16761,214 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                }
                break
        }
+       // match: (ORL (SHLXL x (ANDQconst y [15])) (ANDL (SHRW x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [15]) [-16])) [16]))))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       _ = v_0.Args[1]
+                       x := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 15 {
+                               continue
+                       }
+                       y := v_0_1.Args[0]
+                       if v_1.Op != OpAMD64ANDL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHRW {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGQ {
+                                       continue
+                               }
+                               v_1_0_1_0 := v_1_0_1.Args[0]
+                               if v_1_0_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -16 {
+                                       continue
+                               }
+                               v_1_0_1_0_0 := v_1_0_1_0.Args[0]
+                               if v_1_0_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 15 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 16 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGQ {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -16 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 15 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 2) {
+                                       continue
+                               }
+                               v.reset(OpAMD64ROLW)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORL (SHLXL x (ANDLconst y [15])) (ANDL (SHRW x (NEGL (ADDLconst (ANDLconst y [15]) [-16]))) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [15]) [-16])) [16]))))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       _ = v_0.Args[1]
+                       x := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 15 {
+                               continue
+                       }
+                       y := v_0_1.Args[0]
+                       if v_1.Op != OpAMD64ANDL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHRW {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGL {
+                                       continue
+                               }
+                               v_1_0_1_0 := v_1_0_1.Args[0]
+                               if v_1_0_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -16 {
+                                       continue
+                               }
+                               v_1_0_1_0_0 := v_1_0_1_0.Args[0]
+                               if v_1_0_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 15 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 16 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGL {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -16 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 15 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 2) {
+                                       continue
+                               }
+                               v.reset(OpAMD64ROLW)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORL (SHRW x (ANDQconst y [15])) (SHLXL x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))))
+       // cond: v.Type.Size() == 2
+       // result: (RORW x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHRW {
+                               continue
+                       }
+                       _ = v_0.Args[1]
+                       x := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 15 {
+                               continue
+                       }
+                       y := v_0_1.Args[0]
+                       if v_1.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       if x != v_1.Args[0] {
+                               continue
+                       }
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGQ {
+                               continue
+                       }
+                       v_1_1_0 := v_1_1.Args[0]
+                       if v_1_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0.AuxInt) != -16 {
+                               continue
+                       }
+                       v_1_1_0_0 := v_1_1_0.Args[0]
+                       if v_1_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 15 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 2) {
+                               continue
+                       }
+                       v.reset(OpAMD64RORW)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
+       // match: (ORL (SHRW x (ANDLconst y [15])) (SHLXL x (NEGL (ADDLconst (ANDLconst y [15]) [-16]))))
+       // cond: v.Type.Size() == 2
+       // result: (RORW x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHRW {
+                               continue
+                       }
+                       _ = v_0.Args[1]
+                       x := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 15 {
+                               continue
+                       }
+                       y := v_0_1.Args[0]
+                       if v_1.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       if x != v_1.Args[0] {
+                               continue
+                       }
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGL {
+                               continue
+                       }
+                       v_1_1_0 := v_1_1.Args[0]
+                       if v_1_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0.AuxInt) != -16 {
+                               continue
+                       }
+                       v_1_1_0_0 := v_1_1_0.Args[0]
+                       if v_1_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 15 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 2) {
+                               continue
+                       }
+                       v.reset(OpAMD64RORW)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
        // match: (ORL (SHLL x (ANDQconst y [ 7])) (ANDL (SHRB x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])) [ 8]))))
        // cond: v.Type.Size() == 1
        // result: (ROLB x y)
@@ -16616,6 +17177,214 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                }
                break
        }
+       // match: (ORL (SHLXL x (ANDQconst y [ 7])) (ANDL (SHRB x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])) [ 8]))))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       _ = v_0.Args[1]
+                       x := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 7 {
+                               continue
+                       }
+                       y := v_0_1.Args[0]
+                       if v_1.Op != OpAMD64ANDL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHRB {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGQ {
+                                       continue
+                               }
+                               v_1_0_1_0 := v_1_0_1.Args[0]
+                               if v_1_0_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -8 {
+                                       continue
+                               }
+                               v_1_0_1_0_0 := v_1_0_1_0.Args[0]
+                               if v_1_0_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 7 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 8 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGQ {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -8 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 7 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 1) {
+                                       continue
+                               }
+                               v.reset(OpAMD64ROLB)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORL (SHLXL x (ANDLconst y [ 7])) (ANDL (SHRB x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])) [ 8]))))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       _ = v_0.Args[1]
+                       x := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 7 {
+                               continue
+                       }
+                       y := v_0_1.Args[0]
+                       if v_1.Op != OpAMD64ANDL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHRB {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGL {
+                                       continue
+                               }
+                               v_1_0_1_0 := v_1_0_1.Args[0]
+                               if v_1_0_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -8 {
+                                       continue
+                               }
+                               v_1_0_1_0_0 := v_1_0_1_0.Args[0]
+                               if v_1_0_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 7 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 8 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGL {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -8 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 7 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 1) {
+                                       continue
+                               }
+                               v.reset(OpAMD64ROLB)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORL (SHRB x (ANDQconst y [ 7])) (SHLXL x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))))
+       // cond: v.Type.Size() == 1
+       // result: (RORB x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHRB {
+                               continue
+                       }
+                       _ = v_0.Args[1]
+                       x := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 7 {
+                               continue
+                       }
+                       y := v_0_1.Args[0]
+                       if v_1.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       if x != v_1.Args[0] {
+                               continue
+                       }
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGQ {
+                               continue
+                       }
+                       v_1_1_0 := v_1_1.Args[0]
+                       if v_1_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0.AuxInt) != -8 {
+                               continue
+                       }
+                       v_1_1_0_0 := v_1_1_0.Args[0]
+                       if v_1_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 7 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 1) {
+                               continue
+                       }
+                       v.reset(OpAMD64RORB)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
+       // match: (ORL (SHRB x (ANDLconst y [ 7])) (SHLXL x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8]))))
+       // cond: v.Type.Size() == 1
+       // result: (RORB x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHRB {
+                               continue
+                       }
+                       _ = v_0.Args[1]
+                       x := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 7 {
+                               continue
+                       }
+                       y := v_0_1.Args[0]
+                       if v_1.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       if x != v_1.Args[0] {
+                               continue
+                       }
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGL {
+                               continue
+                       }
+                       v_1_1_0 := v_1_1.Args[0]
+                       if v_1_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0.AuxInt) != -8 {
+                               continue
+                       }
+                       v_1_1_0_0 := v_1_1_0.Args[0]
+                       if v_1_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 7 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 1) {
+                               continue
+                       }
+                       v.reset(OpAMD64RORB)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
        // match: (ORL x x)
        // result: x
        for {
@@ -17509,6 +18278,25 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                }
                break
        }
+       // match: (ORQ (SHLXQ (MOVQconst [1]) y) x)
+       // result: (BTSQ x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       x := v_1
+                       v.reset(OpAMD64BTSQ)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
        // match: (ORQ (MOVQconst [c]) x)
        // cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
        // result: (BTSQconst [int8(log64(c))] x)
@@ -17789,6 +18577,206 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                }
                break
        }
+       // match: (ORQ (SHLXQ x y) (ANDQ (SHRXQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64]))))
+       // result: (ROLQ x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if v_1.Op != OpAMD64ANDQ {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHRXQ {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGQ || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBQcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGQ {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] {
+                                       continue
+                               }
+                               v.reset(OpAMD64ROLQ)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORQ (SHLXQ x y) (ANDQ (SHRXQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64]))))
+       // result: (ROLQ x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if v_1.Op != OpAMD64ANDQ {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHRXQ {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGL || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBQcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGL {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] {
+                                       continue
+                               }
+                               v.reset(OpAMD64ROLQ)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORQ (SHRXQ x y) (ANDQ (SHLXQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64]))))
+       // result: (RORQ x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHRXQ {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if v_1.Op != OpAMD64ANDQ {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHLXQ {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGQ || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBQcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGQ {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] {
+                                       continue
+                               }
+                               v.reset(OpAMD64RORQ)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
+       // match: (ORQ (SHRXQ x y) (ANDQ (SHLXQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64]))))
+       // result: (RORQ x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHRXQ {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if v_1.Op != OpAMD64ANDQ {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if v_1_0.Op != OpAMD64SHLXQ {
+                                       continue
+                               }
+                               _ = v_1_0.Args[1]
+                               if x != v_1_0.Args[0] {
+                                       continue
+                               }
+                               v_1_0_1 := v_1_0.Args[1]
+                               if v_1_0_1.Op != OpAMD64NEGL || y != v_1_0_1.Args[0] || v_1_1.Op != OpAMD64SBBQcarrymask {
+                                       continue
+                               }
+                               v_1_1_0 := v_1_1.Args[0]
+                               if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 {
+                                       continue
+                               }
+                               v_1_1_0_0 := v_1_1_0.Args[0]
+                               if v_1_1_0_0.Op != OpAMD64NEGL {
+                                       continue
+                               }
+                               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+                               if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 {
+                                       continue
+                               }
+                               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+                               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] {
+                                       continue
+                               }
+                               v.reset(OpAMD64RORQ)
+                               v.AddArg2(x, y)
+                               return true
+                       }
+               }
+               break
+       }
        // match: (ORQ (SHRQ lo bits) (SHLQ hi (NEGQ bits)))
        // result: (SHRDQ lo hi bits)
        for {
@@ -17837,6 +18825,54 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                }
                break
        }
+       // match: (ORQ (SHRXQ lo bits) (SHLXQ hi (NEGQ bits)))
+       // result: (SHRDQ lo hi bits)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHRXQ {
+                               continue
+                       }
+                       bits := v_0.Args[1]
+                       lo := v_0.Args[0]
+                       if v_1.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       hi := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
+                               continue
+                       }
+                       v.reset(OpAMD64SHRDQ)
+                       v.AddArg3(lo, hi, bits)
+                       return true
+               }
+               break
+       }
+       // match: (ORQ (SHLXQ lo bits) (SHRXQ hi (NEGQ bits)))
+       // result: (SHLDQ lo hi bits)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       bits := v_0.Args[1]
+                       lo := v_0.Args[0]
+                       if v_1.Op != OpAMD64SHRXQ {
+                               continue
+                       }
+                       _ = v_1.Args[1]
+                       hi := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
+                               continue
+                       }
+                       v.reset(OpAMD64SHLDQ)
+                       v.AddArg3(lo, hi, bits)
+                       return true
+               }
+               break
+       }
        // match: (ORQ (MOVQconst [c]) (MOVQconst [d]))
        // result: (MOVQconst [c|d])
        for {
@@ -22062,6 +23098,60 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
                }
                break
        }
+       // match: (SETEQ (TESTL (SHLXL (MOVLconst [1]) x) y))
+       // result: (SETAE (BTL x y))
+       for {
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+                       if v_0_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       x := v_0_0.Args[1]
+                       v_0_0_0 := v_0_0.Args[0]
+                       if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       y := v_0_1
+                       v.reset(OpAMD64SETAE)
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       v.AddArg(v0)
+                       return true
+               }
+               break
+       }
+       // match: (SETEQ (TESTQ (SHLXQ (MOVQconst [1]) x) y))
+       // result: (SETAE (BTQ x y))
+       for {
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+                       if v_0_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       x := v_0_0.Args[1]
+                       v_0_0_0 := v_0_0.Args[0]
+                       if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       y := v_0_1
+                       v.reset(OpAMD64SETAE)
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       v.AddArg(v0)
+                       return true
+               }
+               break
+       }
        // match: (SETEQ (TESTLconst [c] x))
        // cond: isUint32PowerOfTwo(int64(c))
        // result: (SETAE (BTLconst [int8(log32(c))] x))
@@ -22487,6 +23577,72 @@ func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool {
                }
                break
        }
+       // match: (SETEQstore [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem)
+       // result: (SETAEstore [off] {sym} ptr (BTL x y) mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64TESTL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
+                       if v_1_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       x := v_1_0.Args[1]
+                       v_1_0_0 := v_1_0.Args[0]
+                       if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       y := v_1_1
+                       mem := v_2
+                       v.reset(OpAMD64SETAEstore)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       v.AddArg3(ptr, v0, mem)
+                       return true
+               }
+               break
+       }
+       // match: (SETEQstore [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem)
+       // result: (SETAEstore [off] {sym} ptr (BTQ x y) mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
+                       if v_1_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       x := v_1_0.Args[1]
+                       v_1_0_0 := v_1_0.Args[0]
+                       if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       y := v_1_1
+                       mem := v_2
+                       v.reset(OpAMD64SETAEstore)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       v.AddArg3(ptr, v0, mem)
+                       return true
+               }
+               break
+       }
        // match: (SETEQstore [off] {sym} ptr (TESTLconst [c] x) mem)
        // cond: isUint32PowerOfTwo(int64(c))
        // result: (SETAEstore [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
@@ -23978,6 +25134,60 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
                }
                break
        }
+       // match: (SETNE (TESTL (SHLXL (MOVLconst [1]) x) y))
+       // result: (SETB (BTL x y))
+       for {
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+                       if v_0_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       x := v_0_0.Args[1]
+                       v_0_0_0 := v_0_0.Args[0]
+                       if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       y := v_0_1
+                       v.reset(OpAMD64SETB)
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       v.AddArg(v0)
+                       return true
+               }
+               break
+       }
+       // match: (SETNE (TESTQ (SHLXQ (MOVQconst [1]) x) y))
+       // result: (SETB (BTQ x y))
+       for {
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+                       if v_0_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       x := v_0_0.Args[1]
+                       v_0_0_0 := v_0_0.Args[0]
+                       if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       y := v_0_1
+                       v.reset(OpAMD64SETB)
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       v.AddArg(v0)
+                       return true
+               }
+               break
+       }
        // match: (SETNE (TESTLconst [c] x))
        // cond: isUint32PowerOfTwo(int64(c))
        // result: (SETB (BTLconst [int8(log32(c))] x))
@@ -24403,6 +25613,72 @@ func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool {
                }
                break
        }
+       // match: (SETNEstore [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem)
+       // result: (SETBstore [off] {sym} ptr (BTL x y) mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64TESTL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
+                       if v_1_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       x := v_1_0.Args[1]
+                       v_1_0_0 := v_1_0.Args[0]
+                       if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       y := v_1_1
+                       mem := v_2
+                       v.reset(OpAMD64SETBstore)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       v.AddArg3(ptr, v0, mem)
+                       return true
+               }
+               break
+       }
+       // match: (SETNEstore [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem)
+       // result: (SETBstore [off] {sym} ptr (BTQ x y) mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
+                       if v_1_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       x := v_1_0.Args[1]
+                       v_1_0_0 := v_1_0.Args[0]
+                       if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       y := v_1_1
+                       mem := v_2
+                       v.reset(OpAMD64SETBstore)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       v.AddArg3(ptr, v0, mem)
+                       return true
+               }
+               break
+       }
        // match: (SETNEstore [off] {sym} ptr (TESTLconst [c] x) mem)
        // cond: isUint32PowerOfTwo(int64(c))
        // result: (SETBstore [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
@@ -24917,6 +26193,19 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (SHLL x y)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (SHLXL x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64SHLXL)
+               v.AddArg2(x, y)
+               return true
+       }
        // match: (SHLL x (MOVQconst [c]))
        // result: (SHLLconst [int8(c&31)] x)
        for {
@@ -25107,28 +26396,6 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       // match: (SHLL l:(MOVLload [off] {sym} ptr mem) x)
-       // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)
-       // result: (SHLXLload [off] {sym} ptr x mem)
-       for {
-               l := v_0
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               x := v_1
-               if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64SHLXLload)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
@@ -25173,6 +26440,19 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (SHLQ x y)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (SHLXQ x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQ)
+               v.AddArg2(x, y)
+               return true
+       }
        // match: (SHLQ x (MOVQconst [c]))
        // result: (SHLQconst [int8(c&63)] x)
        for {
@@ -25363,28 +26643,6 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       // match: (SHLQ l:(MOVQload [off] {sym} ptr mem) x)
-       // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)
-       // result: (SHLXQload [off] {sym} ptr x mem)
-       for {
-               l := v_0
-               if l.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               x := v_1
-               if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64SHLXQload)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
@@ -25437,6 +26695,442 @@ func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64SHLXL(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (SHLXL x (MOVQconst [c]))
+       // result: (SHLLconst [int8(c&31)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = int8ToAuxInt(int8(c & 31))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SHLXL x (MOVLconst [c]))
+       // result: (SHLLconst [int8(c&31)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = int8ToAuxInt(int8(c & 31))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SHLXL x (ADDQconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHLXL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLXL)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHLXL x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHLXL x (NEGQ <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLXL)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHLXL x (ANDQconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHLXL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHLXL)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHLXL x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHLXL x (NEGQ <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHLXL)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHLXL x (ADDLconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHLXL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLXL)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHLXL x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHLXL x (NEGL <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLXL)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHLXL x (ANDLconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHLXL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHLXL)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHLXL x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHLXL x (NEGL <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHLXL)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHLXL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (SHLXLload [off] {sym} ptr x mem)
+       for {
+               l := v_0
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               x := v_1
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SHLXLload)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHLXQ(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (SHLXQ x (MOVQconst [c]))
+       // result: (SHLQconst [int8(c&63)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = int8ToAuxInt(int8(c & 63))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SHLXQ x (MOVLconst [c]))
+       // result: (SHLQconst [int8(c&63)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = int8ToAuxInt(int8(c & 63))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SHLXQ x (ADDQconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHLXQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQ)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHLXQ x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHLXQ x (NEGQ <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHLXQ x (ANDQconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SHLXQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQ)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHLXQ x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHLXQ x (NEGQ <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHLXQ x (ADDLconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHLXQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQ)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHLXQ x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHLXQ x (NEGL <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHLXQ x (ANDLconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SHLXQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQ)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHLXQ x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHLXQ x (NEGL <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHLXQ l:(MOVQload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (SHLXQload [off] {sym} ptr x mem)
+       for {
+               l := v_0
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               x := v_1
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SHLXQload)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64SHRB(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -25524,6 +27218,19 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (SHRL x y)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (SHRXL x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64SHRXL)
+               v.AddArg2(x, y)
+               return true
+       }
        // match: (SHRL x (MOVQconst [c]))
        // result: (SHRLconst [int8(c&31)] x)
        for {
@@ -25714,28 +27421,6 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       // match: (SHRL l:(MOVLload [off] {sym} ptr mem) x)
-       // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)
-       // result: (SHRXLload [off] {sym} ptr x mem)
-       for {
-               l := v_0
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               x := v_1
-               if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64SHRXLload)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
@@ -25768,6 +27453,19 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (SHRQ x y)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (SHRXQ x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQ)
+               v.AddArg2(x, y)
+               return true
+       }
        // match: (SHRQ x (MOVQconst [c]))
        // result: (SHRQconst [int8(c&63)] x)
        for {
@@ -25958,28 +27656,6 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       // match: (SHRQ l:(MOVQload [off] {sym} ptr mem) x)
-       // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)
-       // result: (SHRXQload [off] {sym} ptr x mem)
-       for {
-               l := v_0
-               if l.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               x := v_1
-               if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64SHRXQload)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool {
@@ -26091,6 +27767,442 @@ func rewriteValueAMD64_OpAMD64SHRWconst(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64SHRXL(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (SHRXL x (MOVQconst [c]))
+       // result: (SHRLconst [int8(c&31)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64SHRLconst)
+               v.AuxInt = int8ToAuxInt(int8(c & 31))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SHRXL x (MOVLconst [c]))
+       // result: (SHRLconst [int8(c&31)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               v.reset(OpAMD64SHRLconst)
+               v.AuxInt = int8ToAuxInt(int8(c & 31))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SHRXL x (ADDQconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHRXL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRXL)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHRXL x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHRXL x (NEGQ <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRXL)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHRXL x (ANDQconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHRXL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHRXL)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHRXL x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHRXL x (NEGQ <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHRXL)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHRXL x (ADDLconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHRXL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRXL)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHRXL x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHRXL x (NEGL <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRXL)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHRXL x (ANDLconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHRXL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHRXL)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHRXL x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHRXL x (NEGL <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHRXL)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHRXL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (SHRXLload [off] {sym} ptr x mem)
+       for {
+               l := v_0
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               x := v_1
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SHRXLload)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHRXQ(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (SHRXQ x (MOVQconst [c]))
+       // result: (SHRQconst [int8(c&63)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64SHRQconst)
+               v.AuxInt = int8ToAuxInt(int8(c & 63))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SHRXQ x (MOVLconst [c]))
+       // result: (SHRQconst [int8(c&63)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               v.reset(OpAMD64SHRQconst)
+               v.AuxInt = int8ToAuxInt(int8(c & 63))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SHRXQ x (ADDQconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHRXQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQ)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHRXQ x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHRXQ x (NEGQ <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHRXQ x (ANDQconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SHRXQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQ)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHRXQ x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHRXQ x (NEGQ <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHRXQ x (ADDLconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHRXQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQ)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHRXQ x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHRXQ x (NEGL <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHRXQ x (ANDLconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SHRXQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1.AuxInt)
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQ)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (SHRXQ x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHRXQ x (NEGL <t> y))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := auxIntToInt32(v_1_0.AuxInt)
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (SHRXQ l:(MOVQload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (SHRXQload [off] {sym} ptr x mem)
+       for {
+               l := v_0
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               x := v_1
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SHRXQload)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64SUBL(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -27374,6 +29486,25 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                }
                break
        }
+       // match: (XORL (SHLXL (MOVLconst [1]) y) x)
+       // result: (BTCL x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXL {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       x := v_1
+                       v.reset(OpAMD64BTCL)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
        // match: (XORL (MOVLconst [c]) x)
        // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
        // result: (BTCLconst [int8(log32(c))] x)
@@ -27911,6 +30042,25 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                }
                break
        }
+       // match: (XORQ (SHLXQ (MOVQconst [1]) y) x)
+       // result: (BTCQ x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLXQ {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       x := v_1
+                       v.reset(OpAMD64BTCQ)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
        // match: (XORQ (MOVQconst [c]) x)
        // cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
        // result: (BTCQconst [int8(log64(c))] x)
@@ -34533,6 +36683,54 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        break
                }
+               // match: (EQ (TESTL (SHLXL (MOVLconst [1]) x) y))
+               // result: (UGE (BTL x y))
+               for b.Controls[0].Op == OpAMD64TESTL {
+                       v_0 := b.Controls[0]
+                       _ = v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+                               if v_0_0.Op != OpAMD64SHLXL {
+                                       continue
+                               }
+                               x := v_0_0.Args[1]
+                               v_0_0_0 := v_0_0.Args[0]
+                               if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 {
+                                       continue
+                               }
+                               y := v_0_1
+                               v0 := b.NewValue0(v_0.Pos, OpAMD64BTL, types.TypeFlags)
+                               v0.AddArg2(x, y)
+                               b.resetWithControl(BlockAMD64UGE, v0)
+                               return true
+                       }
+                       break
+               }
+               // match: (EQ (TESTQ (SHLXQ (MOVQconst [1]) x) y))
+               // result: (UGE (BTQ x y))
+               for b.Controls[0].Op == OpAMD64TESTQ {
+                       v_0 := b.Controls[0]
+                       _ = v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+                               if v_0_0.Op != OpAMD64SHLXQ {
+                                       continue
+                               }
+                               x := v_0_0.Args[1]
+                               v_0_0_0 := v_0_0.Args[0]
+                               if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 {
+                                       continue
+                               }
+                               y := v_0_1
+                               v0 := b.NewValue0(v_0.Pos, OpAMD64BTQ, types.TypeFlags)
+                               v0.AddArg2(x, y)
+                               b.resetWithControl(BlockAMD64UGE, v0)
+                               return true
+                       }
+                       break
+               }
                // match: (EQ (TESTLconst [c] x))
                // cond: isUint32PowerOfTwo(int64(c))
                // result: (UGE (BTLconst [int8(log32(c))] x))
@@ -35336,6 +37534,54 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        break
                }
+               // match: (NE (TESTL (SHLXL (MOVLconst [1]) x) y))
+               // result: (ULT (BTL x y))
+               for b.Controls[0].Op == OpAMD64TESTL {
+                       v_0 := b.Controls[0]
+                       _ = v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+                               if v_0_0.Op != OpAMD64SHLXL {
+                                       continue
+                               }
+                               x := v_0_0.Args[1]
+                               v_0_0_0 := v_0_0.Args[0]
+                               if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 {
+                                       continue
+                               }
+                               y := v_0_1
+                               v0 := b.NewValue0(v_0.Pos, OpAMD64BTL, types.TypeFlags)
+                               v0.AddArg2(x, y)
+                               b.resetWithControl(BlockAMD64ULT, v0)
+                               return true
+                       }
+                       break
+               }
+               // match: (NE (TESTQ (SHLXQ (MOVQconst [1]) x) y))
+               // result: (ULT (BTQ x y))
+               for b.Controls[0].Op == OpAMD64TESTQ {
+                       v_0 := b.Controls[0]
+                       _ = v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       v_0_1 := v_0.Args[1]
+                       for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+                               if v_0_0.Op != OpAMD64SHLXQ {
+                                       continue
+                               }
+                               x := v_0_0.Args[1]
+                               v_0_0_0 := v_0_0.Args[0]
+                               if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 {
+                                       continue
+                               }
+                               y := v_0_1
+                               v0 := b.NewValue0(v_0.Pos, OpAMD64BTQ, types.TypeFlags)
+                               v0.AddArg2(x, y)
+                               b.resetWithControl(BlockAMD64ULT, v0)
+                               return true
+                       }
+                       break
+               }
                // match: (NE (TESTLconst [c] x))
                // cond: isUint32PowerOfTwo(int64(c))
                // result: (ULT (BTLconst [int8(log32(c))] x))
index 1641d5ddd08d58cd53f6a0413ad096d1366b4b6c..3b125a1b5901caf5dc5cfd7c77e41d581810d129 100644 (file)
@@ -72,7 +72,23 @@ func sarx32_load(x []int32, i int) int32 {
        return s
 }
 
-func shlrx64(x []uint64, i int, s uint64) uint64 {
+func shlrx64(x, y uint64) uint64 {
+       // amd64/v3:"SHRXQ"
+       s := x >> y
+       // amd64/v3:"SHLXQ"
+       s = s << y
+       return s
+}
+
+func shlrx32(x, y uint32) uint32 {
+       // amd64/v3:"SHRXL"
+       s := x >> y
+       // amd64/v3:"SHLXL"
+       s = s << y
+       return s
+}
+
+func shlrx64_load(x []uint64, i int, s uint64) uint64 {
        // amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
        s = x[i] >> i
        // amd64/v3: `SHLXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
@@ -80,7 +96,7 @@ func shlrx64(x []uint64, i int, s uint64) uint64 {
        return s
 }
 
-func shlrx32(x []uint32, i int, s uint32) uint32 {
+func shlrx32_load(x []uint32, i int, s uint32) uint32 {
        // amd64/v3: `SHRXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
        s = x[i] >> i
        // amd64/v3: `SHLXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`