]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: automatically handle commuting ops in rewrite rules
authorKeith Randall <khr@golang.org>
Sat, 25 Mar 2017 22:05:42 +0000 (15:05 -0700)
committerKeith Randall <khr@golang.org>
Wed, 29 Mar 2017 16:22:09 +0000 (16:22 +0000)
We have lots of rewrite rules that vary only in the fact that
we have 2 versions for the 2 different orderings of various
commuting ops. For example:

(ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
(ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x)

It can get unwieldly quickly, especially when there is more than
one commuting op in a rule.

Our existing "fix" for this problem is to have rules that
canonicalize the operations first. For example:

(Eq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Eq64 (Const64 <t> [c]) x)

Subsequent rules can then assume if there is a constant arg to Eq64,
it will be the first one. This fix kinda works, but it is fragile and
only works when we remember to include the required extra rules.

The fundamental problem is that the rule matcher doesn't
know anything about commuting ops. This CL fixes that fact.

We already have information about which ops commute. (The register
allocator takes advantage of commutivity.)  The rule generator now
automatically generates multiple rules for a single source rule when
there are commutative ops in the rule. We can now drop all of our
almost-duplicate source-level rules and the canonicalization rules.

I have some CLs in progress that will be a lot less verbose when
the rule generator handles commutivity for me.

I had to reorganize the load-combining rules a bit. The 8-way OR rules
generated 128 different reorderings, which was causing the generator
to put too much code in the rewrite*.go files (the big ones were going
from 25K lines to 132K lines). Instead I reorganized the rules to
combine pairs of loads at a time. The generated rule files are now
actually a bit (5%) smaller.
[Note to reviewers: check these carefully. Most of the other rule
changes are trivial.]

Make.bash times are ~unchanged.

Compiler benchmarks are not observably different. Probably because
we don't spend much compiler time in rule matching anyway.

I've also done a pass over all of our ops adding commutative markings
for ops which hadn't had them previously.

Fixes #18292

Change-Id: I999b1307272e91965b66754576019dedcbe7527a
Reviewed-on: https://go-review.googlesource.com/38666
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
26 files changed:
src/cmd/compile/internal/ssa/gen/386.rules
src/cmd/compile/internal/ssa/gen/386Ops.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/gen/ARM.rules
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/MIPS.rules
src/cmd/compile/internal/ssa/gen/MIPS64.rules
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/gen/S390X.rules
src/cmd/compile/internal/ssa/gen/S390XOps.go
src/cmd/compile/internal/ssa/gen/generic.rules
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/gen/rulegen.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite386.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssa/rewriteARM.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/compile/internal/ssa/rewriteMIPS.go
src/cmd/compile/internal/ssa/rewriteMIPS64.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/compile/internal/ssa/rewriteS390X.go
src/cmd/compile/internal/ssa/rewritedec.go
src/cmd/compile/internal/ssa/rewritegeneric.go

index 13d9bb935f3b71e49d8372dc312d762ce3108f24..c67796ea09ce95cff328d707f6a0fef06148ca90 100644 (file)
 
 // fold constants into instructions
 (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
-(ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x)
 (ADDLcarry x (MOVLconst [c])) -> (ADDLconstcarry [c] x)
-(ADDLcarry (MOVLconst [c]) x) -> (ADDLconstcarry [c] x)
 (ADCL x (MOVLconst [c]) f) -> (ADCLconst [c] x f)
 (ADCL (MOVLconst [c]) x f) -> (ADCLconst [c] x f)
 
 (SBBL x (MOVLconst [c]) f) -> (SBBLconst [c] x f)
 
 (MULL x (MOVLconst [c])) -> (MULLconst [c] x)
-(MULL (MOVLconst [c]) x) -> (MULLconst [c] x)
 
 (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
-(ANDL (MOVLconst [c]) x) -> (ANDLconst [c] x)
 
 (ANDLconst [c] (ANDLconst [d] x)) -> (ANDLconst [c & d] x)
 
 (MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
 
 (ORL x (MOVLconst [c])) -> (ORLconst [c] x)
-(ORL (MOVLconst [c]) x) -> (ORLconst [c] x)
 
 (XORL x (MOVLconst [c])) -> (XORLconst [c] x)
-(XORL (MOVLconst [c]) x) -> (XORLconst [c] x)
 
 (SHLL x (MOVLconst [c])) -> (SHLLconst [c&31] x)
 (SHRL x (MOVLconst [c])) -> (SHRLconst [c&31] x)
 
 // Rotate instructions
 
-(ADDL (SHLLconst [c] x) (SHRLconst [32-c] x)) -> (ROLLconst [c   ] x)
-( ORL (SHLLconst [c] x) (SHRLconst [32-c] x)) -> (ROLLconst [c   ] x)
-(XORL (SHLLconst [c] x) (SHRLconst [32-c] x)) -> (ROLLconst [c   ] x)
-(ADDL (SHRLconst [c] x) (SHLLconst [32-c] x)) -> (ROLLconst [32-c] x)
-( ORL (SHRLconst [c] x) (SHLLconst [32-c] x)) -> (ROLLconst [32-c] x)
-(XORL (SHRLconst [c] x) (SHLLconst [32-c] x)) -> (ROLLconst [32-c] x)
-
-(ADDL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-( ORL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-(XORL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-(ADDL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-( ORL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-(XORL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-
-(ADDL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-( ORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-(XORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-(ADDL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
-( ORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
-(XORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
+(ADDL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
+( ORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
+(XORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
+
+(ADDL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
+( ORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
+(XORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
+
+(ADDL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
+( ORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
+(XORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
 
 (ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
 (ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
 (MULLconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
 (MULLconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
 (MULLconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
-(MULLconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3)-> (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
-(MULLconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5)-> (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
-(MULLconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9)-> (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
+(MULLconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
+(MULLconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
+(MULLconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
 
 // combine add/shift into LEAL
 (ADDL x (SHLLconst [3] y)) -> (LEAL8 x y)
 (ADDL x (SHLLconst [1] y)) -> (LEAL2 x y)
 (ADDL x (ADDL y y)) -> (LEAL2 x y)
 (ADDL x (ADDL x y)) -> (LEAL2 y x)
-(ADDL x (ADDL y x)) -> (LEAL2 y x)
 
 // combine ADDL/ADDLconst into LEAL1
 (ADDLconst [c] (ADDL x y)) -> (LEAL1 [c] x y)
 (ADDL (ADDLconst [c] x) y) -> (LEAL1 [c] x y)
-(ADDL x (ADDLconst [c] y)) -> (LEAL1 [c] x y)
 
 // fold ADDL into LEAL
 (ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
 (LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
 (LEAL [c] {s} (ADDL x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
 (ADDL x (LEAL [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
-(ADDL (LEAL [c] {s} x) y) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
 
 // fold ADDLconst into LEALx
 (ADDLconst [c] (LEAL1 [d] {s} x y)) && is32Bit(c+d) -> (LEAL1 [c+d] {s} x y)
 (ADDLconst [c] (LEAL4 [d] {s} x y)) && is32Bit(c+d) -> (LEAL4 [c+d] {s} x y)
 (ADDLconst [c] (LEAL8 [d] {s} x y)) && is32Bit(c+d) -> (LEAL8 [c+d] {s} x y)
 (LEAL1 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAL1 [c+d] {s} x y)
-(LEAL1 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+d)   && y.Op != OpSB -> (LEAL1 [c+d] {s} x y)
 (LEAL2 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAL2 [c+d] {s} x y)
 (LEAL2 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAL2 [c+2*d] {s} x y)
 (LEAL4 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAL4 [c+d] {s} x y)
 
 // fold shifts into LEALx
 (LEAL1 [c] {s} x (SHLLconst [1] y)) -> (LEAL2 [c] {s} x y)
-(LEAL1 [c] {s} (SHLLconst [1] x) y) -> (LEAL2 [c] {s} y x)
 (LEAL1 [c] {s} x (SHLLconst [2] y)) -> (LEAL4 [c] {s} x y)
-(LEAL1 [c] {s} (SHLLconst [2] x) y) -> (LEAL4 [c] {s} y x)
 (LEAL1 [c] {s} x (SHLLconst [3] y)) -> (LEAL8 [c] {s} x y)
-(LEAL1 [c] {s} (SHLLconst [3] x) y) -> (LEAL8 [c] {s} y x)
-
 (LEAL2 [c] {s} x (SHLLconst [1] y)) -> (LEAL4 [c] {s} x y)
 (LEAL2 [c] {s} x (SHLLconst [2] y)) -> (LEAL8 [c] {s} x y)
 (LEAL4 [c] {s} x (SHLLconst [1] y)) -> (LEAL8 [c] {s} x y)
 // LEAL into LEAL1
 (LEAL1 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
        (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
-(LEAL1 [off1] {sym1} x (LEAL [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
-       (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
 
 // LEAL1 into LEAL
 (LEAL [off1] {sym1} (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
 (CMPWconst x [0]) -> (TESTW x x)
 (CMPBconst x [0]) -> (TESTB x x)
 
-// Move shifts to second argument of ORs.  Helps load combining rules below.
-(ORL x:(SHLLconst _) y) && y.Op != Op386SHLLconst -> (ORL y x)
-
 // Combining byte loads into larger (unaligned) loads.
 // There are many ways these combinations could occur.  This is
 // designed to match the way encoding/binary.LittleEndian does it.
-(ORL                  x0:(MOVBload [i]   {s} p mem)
-    s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
+(ORL                  x0:(MOVBload [i0] {s} p mem)
+    s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+  && i1 == i0+1
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
+  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
 
 (ORL o0:(ORL
-                       x0:(MOVWload [i]   {s} p mem)
-    s0:(SHLLconst [16] x1:(MOVBload [i+2] {s} p mem)))
-    s1:(SHLLconst [24] x2:(MOVBload [i+3] {s} p mem)))
+                       x0:(MOVWload [i0] {s} p mem)
+    s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
+    s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
+  && i2 == i0+2
+  && i3 == i0+3
   && x0.Uses == 1
   && x1.Uses == 1
   && x2.Uses == 1
   && clobber(s0)
   && clobber(s1)
   && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p mem)
+  -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
 
-(ORL                  x0:(MOVBloadidx1 [i]   {s} p idx mem)
-    s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
+(ORL                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
+    s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+  && i1==i0+1
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
+  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
 
 (ORL o0:(ORL
-                       x0:(MOVWloadidx1 [i]   {s} p idx mem)
-    s0:(SHLLconst [16] x1:(MOVBloadidx1 [i+2] {s} p idx mem)))
-    s1:(SHLLconst [24] x2:(MOVBloadidx1 [i+3] {s} p idx mem)))
+                       x0:(MOVWloadidx1 [i0] {s} p idx mem)
+    s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)))
+    s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+  && i2 == i0+2
+  && i3 == i0+3
   && x0.Uses == 1
   && x1.Uses == 1
   && x2.Uses == 1
   && clobber(s0)
   && clobber(s1)
   && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
+  -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
 
 // Combine constant stores into larger (unaligned) stores.
 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
index 2a638ff1edbec75d59ba17110c3926881a25224d..b287775194ffdd2cfafe593ba5d69ca8d9f30520 100644 (file)
@@ -193,10 +193,10 @@ func init() {
                {name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
                {name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
 
-               {name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
-               {name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULLU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
 
-               {name: "MULLQU", argLength: 2, reg: gp21mul, asm: "MULL", clobberFlags: true}, // arg0 * arg1, high 32 in result[0], low 32 in result[1]
+               {name: "MULLQU", argLength: 2, reg: gp21mul, commutative: true, asm: "MULL", clobberFlags: true}, // arg0 * arg1, high 32 in result[0], low 32 in result[1]
 
                {name: "AVGLU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 32 result bits
 
@@ -229,9 +229,9 @@ func init() {
                {name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags", usesScratch: true}, // arg0 compare to arg1, f32
                {name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags", usesScratch: true}, // arg0 compare to arg1, f64
 
-               {name: "TESTL", argLength: 2, reg: gp2flags, asm: "TESTL", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTW", argLength: 2, reg: gp2flags, asm: "TESTW", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTB", argLength: 2, reg: gp2flags, asm: "TESTB", typ: "Flags"},                    // (arg0 & arg1) compare to 0
+               {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTB", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTB", typ: "Flags"}, // (arg0 & arg1) compare to 0
                {name: "TESTLconst", argLength: 1, reg: gp1flags, asm: "TESTL", typ: "Flags", aux: "Int32"}, // (arg0 & auxint) compare to 0
                {name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0
                {name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"},  // (arg0 & auxint) compare to 0
@@ -314,7 +314,7 @@ func init() {
                {name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.
 
                {name: "LEAL", argLength: 1, reg: gp11sb, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
-               {name: "LEAL1", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                         // arg0 + arg1 + auxint + aux
+               {name: "LEAL1", argLength: 2, reg: gp21sb, commutative: true, aux: "SymOff", symEffect: "Addr"},      // arg0 + arg1 + auxint + aux
                {name: "LEAL2", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                         // arg0 + 2*arg1 + auxint + aux
                {name: "LEAL4", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                         // arg0 + 4*arg1 + auxint + aux
                {name: "LEAL8", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                         // arg0 + 8*arg1 + auxint + aux
@@ -331,17 +331,17 @@ func init() {
                {name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},    // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
 
                // indexed loads/stores
-               {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
-               {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
+               {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBLZX", aux: "SymOff", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"},                    // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
+               {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},                       // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
                // TODO: sign-extending indexed loads
-               {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
-               {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
+               {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"},                    // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
+               {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"},                    // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
                // TODO: add size-mismatched indexed loads, like MOVBstoreidx4.
 
                // For storeconst ops, the AuxInt field encodes both
index 2e3e6c01ba91b7404e7b7be183a0854a37eb62cb..ac45cd71e51b4403a976a76497ee02fd56ebaa51 100644 (file)
 (NE (TESTB (SETA  cmp) (SETA  cmp)) yes no) -> (UGT cmp yes no)
 (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no)
 
-// Normalize TESTx argument order for BTx rewrites below.
-(TESTQ y x:(SHLQ _ _)) && y.Op != OpAMD64SHLQ -> (TESTQ x y)
-(TESTL y x:(SHLL _ _)) && y.Op != OpAMD64SHLL -> (TESTL x y)
-
 // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded
 // Note that ULT and SETB check the carry flag; they are identical to CS and SETCS.
 // Same, mutatis mutandis, for UGE and SETAE, and CC and SETCC.
 
 // fold constants into instructions
 (ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x)
-(ADDQ (MOVQconst [c]) x) && is32Bit(c) -> (ADDQconst [c] x)
 (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
-(ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x)
 
 (SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c])
 (SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c]))
 (SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c]))
 
 (MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x)
-(MULQ (MOVQconst [c]) x) && is32Bit(c) -> (MULQconst [c] x)
 (MULL x (MOVLconst [c])) -> (MULLconst [c] x)
-(MULL (MOVLconst [c]) x) -> (MULLconst [c] x)
 
 (ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x)
-(ANDQ (MOVQconst [c]) x) && is32Bit(c) -> (ANDQconst [c] x)
 (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
-(ANDL (MOVLconst [c]) x) -> (ANDLconst [c] x)
 
 (ANDLconst [c] (ANDLconst [d] x)) -> (ANDLconst [c & d] x)
 (ANDQconst [c] (ANDQconst [d] x)) -> (ANDQconst [c & d] x)
 (MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x)
 
 (ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x)
-(ORQ (MOVQconst [c]) x) && is32Bit(c) -> (ORQconst [c] x)
 (ORL x (MOVLconst [c])) -> (ORLconst [c] x)
-(ORL (MOVLconst [c]) x) -> (ORLconst [c] x)
 
 (XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x)
-(XORQ (MOVQconst [c]) x) && is32Bit(c) -> (XORQconst [c] x)
 (XORL x (MOVLconst [c])) -> (XORLconst [c] x)
-(XORL (MOVLconst [c]) x) -> (XORLconst [c] x)
 
 (SHLQ x (MOVQconst [c])) -> (SHLQconst [c&63] x)
 (SHLQ x (MOVLconst [c])) -> (SHLQconst [c&63] x)
 
 // Rotate instructions
 
-(ADDQ (SHLQconst x [c]) (SHRQconst x [64-c])) -> (ROLQconst x [   c])
-( ORQ (SHLQconst x [c]) (SHRQconst x [64-c])) -> (ROLQconst x [   c])
-(XORQ (SHLQconst x [c]) (SHRQconst x [64-c])) -> (ROLQconst x [   c])
-(ADDQ (SHRQconst x [c]) (SHLQconst x [64-c])) -> (ROLQconst x [64-c])
-( ORQ (SHRQconst x [c]) (SHLQconst x [64-c])) -> (ROLQconst x [64-c])
-(XORQ (SHRQconst x [c]) (SHLQconst x [64-c])) -> (ROLQconst x [64-c])
-
-(ADDL (SHLLconst x [c]) (SHRLconst x [32-c])) -> (ROLLconst x [   c])
-( ORL (SHLLconst x [c]) (SHRLconst x [32-c])) -> (ROLLconst x [   c])
-(XORL (SHLLconst x [c]) (SHRLconst x [32-c])) -> (ROLLconst x [   c])
-(ADDL (SHRLconst x [c]) (SHLLconst x [32-c])) -> (ROLLconst x [32-c])
-( ORL (SHRLconst x [c]) (SHLLconst x [32-c])) -> (ROLLconst x [32-c])
-(XORL (SHRLconst x [c]) (SHLLconst x [32-c])) -> (ROLLconst x [32-c])
-
-(ADDL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-( ORL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-(XORL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-(ADDL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-( ORL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-(XORL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-
-(ADDL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-( ORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-(XORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-(ADDL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
-( ORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
-(XORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
+(ADDQ (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
+( ORQ (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
+(XORQ (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
+
+(ADDL (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c])
+( ORL (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c])
+(XORL (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c])
+
+(ADDL <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c])
+( ORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c])
+(XORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c])
+
+(ADDL <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c  && c < 8 && t.Size() == 1 -> (ROLBconst x [c])
+( ORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c  && c < 8 && t.Size() == 1 -> (ROLBconst x [c])
+(XORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c  && c < 8 && t.Size() == 1 -> (ROLBconst x [c])
 
 (ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x)
 (ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
 (MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
 (MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
 (MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
-(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3)-> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
-(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5)-> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
-(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9)-> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
+(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
 
 // combine add/shift into LEAQ
 (ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
 (ADDQ x (SHLQconst [1] y)) -> (LEAQ2 x y)
 (ADDQ x (ADDQ y y)) -> (LEAQ2 x y)
 (ADDQ x (ADDQ x y)) -> (LEAQ2 y x)
-(ADDQ x (ADDQ y x)) -> (LEAQ2 y x)
 
 // combine ADDQ/ADDQconst into LEAQ1
 (ADDQconst [c] (ADDQ x y)) -> (LEAQ1 [c] x y)
 (ADDQ (ADDQconst [c] x) y) -> (LEAQ1 [c] x y)
-(ADDQ x (ADDQconst [c] y)) -> (LEAQ1 [c] x y)
 
 // fold ADDQ into LEAQ
 (ADDQconst [c] (LEAQ [d] {s} x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
 (LEAQ [c] {s} (ADDQconst [d] x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
 (LEAQ [c] {s} (ADDQ x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
 (ADDQ x (LEAQ [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
-(ADDQ (LEAQ [c] {s} x) y) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
 
 // fold ADDQconst into LEAQx
 (ADDQconst [c] (LEAQ1 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ1 [c+d] {s} x y)
 (ADDQconst [c] (LEAQ4 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ4 [c+d] {s} x y)
 (ADDQconst [c] (LEAQ8 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ8 [c+d] {s} x y)
 (LEAQ1 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ1 [c+d] {s} x y)
-(LEAQ1 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+d)   && y.Op != OpSB -> (LEAQ1 [c+d] {s} x y)
 (LEAQ2 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ2 [c+d] {s} x y)
 (LEAQ2 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAQ2 [c+2*d] {s} x y)
 (LEAQ4 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ4 [c+d] {s} x y)
 
 // fold shifts into LEAQx
 (LEAQ1 [c] {s} x (SHLQconst [1] y)) -> (LEAQ2 [c] {s} x y)
-(LEAQ1 [c] {s} (SHLQconst [1] x) y) -> (LEAQ2 [c] {s} y x)
 (LEAQ1 [c] {s} x (SHLQconst [2] y)) -> (LEAQ4 [c] {s} x y)
-(LEAQ1 [c] {s} (SHLQconst [2] x) y) -> (LEAQ4 [c] {s} y x)
 (LEAQ1 [c] {s} x (SHLQconst [3] y)) -> (LEAQ8 [c] {s} x y)
-(LEAQ1 [c] {s} (SHLQconst [3] x) y) -> (LEAQ8 [c] {s} y x)
-
 (LEAQ2 [c] {s} x (SHLQconst [1] y)) -> (LEAQ4 [c] {s} x y)
 (LEAQ2 [c] {s} x (SHLQconst [2] y)) -> (LEAQ8 [c] {s} x y)
 (LEAQ4 [c] {s} x (SHLQconst [1] y)) -> (LEAQ8 [c] {s} x y)
 // LEAQ into LEAQ1
 (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
        (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
-(LEAQ1 [off1] {sym1} x (LEAQ [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
-       (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
 
 // LEAQ1 into LEAQ
 (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
 (TESTL (MOVLconst [c]) x) -> (TESTLconst [c] x)
 (TESTW (MOVLconst [c]) x) -> (TESTWconst [c] x)
 (TESTB (MOVLconst [c]) x) -> (TESTBconst [c] x)
-(TESTQ x (MOVQconst [c])) && is32Bit(c) -> (TESTQconst [c] x)
-(TESTL x (MOVLconst [c])) -> (TESTLconst [c] x)
-(TESTW x (MOVLconst [c])) -> (TESTWconst [c] x)
-(TESTB x (MOVLconst [c])) -> (TESTBconst [c] x)
 
 // TEST %reg,%reg is shorter than CMP
 (CMPQconst x [0]) -> (TESTQ x x)
 (CMPWconst x [0]) -> (TESTW x x)
 (CMPBconst x [0]) -> (TESTB x x)
 
-// Move shifts to second argument of ORs.  Helps load combining rules below.
-(ORQ x:(SHLQconst _) y) && y.Op != OpAMD64SHLQconst -> (ORQ y x)
-(ORL x:(SHLLconst _) y) && y.Op != OpAMD64SHLLconst -> (ORL y x)
-
 // Combining byte loads into larger (unaligned) loads.
 // There are many ways these combinations could occur.  This is
 // designed to match the way encoding/binary.LittleEndian does it.
-(ORL                  x0:(MOVBload [i]   {s} p mem)
-    s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
+
+// Little-endian loads
+
+(ORL                  x0:(MOVBload [i0] {s} p mem)
+    sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+  && i1 == i0+1
   && x0.Uses == 1
   && x1.Uses == 1
-  && s0.Uses == 1
+  && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
 
-(ORL o0:(ORL
-                       x0:(MOVWload [i]   {s} p mem)
-    s0:(SHLLconst [16] x1:(MOVBload [i+2] {s} p mem)))
-    s1:(SHLLconst [24] x2:(MOVBload [i+3] {s} p mem)))
+(ORQ                  x0:(MOVBload [i0] {s} p mem)
+    sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+
+(ORL                   x0:(MOVWload [i0] {s} p mem)
+    sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+
+(ORQ                   x0:(MOVWload [i0] {s} p mem)
+    sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+
+(ORQ                   x0:(MOVLload [i0] {s} p mem)
+    sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
+
+(ORL
+    s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
+    or:(ORL
+        s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p mem)
-
-(ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ
-                       x0:(MOVBload [i]   {s} p mem)
-    s0:(SHLQconst [8]  x1:(MOVBload [i+1] {s} p mem)))
-    s1:(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem)))
-    s2:(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem)))
-    s3:(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem)))
-    s4:(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem)))
-    s5:(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem)))
-    s6:(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+
+(ORQ
+    s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
+    or:(ORQ
+        s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)
-
-(ORL                  x0:(MOVBloadidx1 [i]   {s} p idx mem)
-    s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+
+(ORQ
+    s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem))
+    or:(ORQ
+        s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))
+       y))
+  && i1 == i0+2
+  && j1 == j0+16
+  && j0 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
 
-(ORL o0:(ORL
-                       x0:(MOVWloadidx1 [i]   {s} p idx mem)
-    s0:(SHLLconst [16] x1:(MOVBloadidx1 [i+2] {s} p idx mem)))
-    s1:(SHLLconst [24] x2:(MOVBloadidx1 [i+3] {s} p idx mem)))
+// Little-endian indexed loads
+
+(ORL                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
+    sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+
+(ORQ                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
+    sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+
+(ORL                   x0:(MOVWloadidx1 [i0] {s} p idx mem)
+    sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+
+(ORQ                   x0:(MOVWloadidx1 [i0] {s} p idx mem)
+    sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+
+(ORQ                   x0:(MOVLloadidx1 [i0] {s} p idx mem)
+    sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+
+(ORL
+    s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+    or:(ORL
+        s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
-
-(ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ
-                       x0:(MOVBloadidx1 [i]   {s} p idx mem)
-    s0:(SHLQconst [8]  x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
-    s1:(SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem)))
-    s2:(SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem)))
-    s3:(SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem)))
-    s4:(SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem)))
-    s5:(SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem)))
-    s6:(SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+
+(ORQ
+    s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+    or:(ORQ
+        s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
-
-// Combine 2 byte loads + shifts into (unaligned) word load + rolw 8
-(ORL
-                       x0:(MOVBload [i] {s} p mem)
-    s0:(SHLLconst [8]  x1:(MOVBload [i-1] {s} p mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+
+(ORQ
+    s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem))
+    or:(ORQ
+        s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       y))
+  && i1 == i0+2
+  && j1 == j0+16
+  && j0 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i-1] {s} p mem))
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+
+// Big-endian loads
+
+(ORL
+                       x1:(MOVBload [i1] {s} p mem)
+    sh:(SHLLconst [8]  x0:(MOVBload [i0] {s} p mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+
+(ORQ
+                       x1:(MOVBload [i1] {s} p mem)
+    sh:(SHLQconst [8]  x0:(MOVBload [i0] {s} p mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+
+(ORL
+                        r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
+    sh:(SHLLconst [16]  r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+
+(ORQ
+                        r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
+    sh:(SHLQconst [16]  r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+
+(ORQ
+                        r1:(BSWAPL x1:(MOVLload [i1] {s} p mem))
+    sh:(SHLQconst [32]  r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
 
 (ORL
-                       x0:(MOVBloadidx1 [i] {s} p idx mem)
-    s0:(SHLLconst [8]  x1:(MOVBloadidx1 [i-1] {s} p idx mem)))
+    s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
+    or:(ORL
+        s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 <v.Type> [i-1] {s} p idx mem))
-
-// Combine byte loads + shifts into larger (unaligned) loads + bswap
-// (for L version first 2 bytes loads are matched as result of above 2-bytes load+shift rewrite)
-(ORL o1:(ORL o0:(ROLWconst [8] x01:(MOVWload [i1] {s} p mem))
-    s1:(SHLLconst [16] x2:(MOVBload [i1-1] {s} p mem)))
-    s2:(SHLLconst [24] x3:(MOVBload [i1-2] {s} p mem)))
-  && x01.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+
+(ORQ
+    s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
+    or:(ORQ
+        s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && mergePoint(b,x01,x2,x3) != nil
-  && clobber(x01)
-  && clobber(x2)
-  && clobber(x3)
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(o0)
-  && clobber(o1)
-  -> @mergePoint(b,x01,x2,x3) (BSWAPL <v.Type> (MOVLload [i1-2] {s} p mem))
-
-(ORL o1:(ORL o0:(ROLWconst [8] x01:(MOVWloadidx1 [i1] {s} p idx mem))
-    s1:(SHLLconst [16] x2:(MOVBloadidx1 [i1-1] {s} p idx mem)))
-    s2:(SHLLconst [24] x3:(MOVBloadidx1 [i1-2] {s} p idx mem)))
-  && x01.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+
+(ORQ
+    s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))
+    or:(ORQ
+        s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
+       y))
+  && i1 == i0+2
+  && j1 == j0-16
+  && j1 % 32 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && mergePoint(b,x01,x2,x3) != nil
-  && clobber(x01)
-  && clobber(x2)
-  && clobber(x3)
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(o0)
-  && clobber(o1)
-  -> @mergePoint(b,x01,x2,x3) (BSWAPL <v.Type> (MOVLloadidx1 <v.Type> [i1-2] {s} p idx mem))
-
-(ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ
-                       x0:(MOVBload [i] {s} p mem)
-    s0:(SHLQconst [8]  x1:(MOVBload [i-1] {s} p mem)))
-    s1:(SHLQconst [16] x2:(MOVBload [i-2] {s} p mem)))
-    s2:(SHLQconst [24] x3:(MOVBload [i-3] {s} p mem)))
-    s3:(SHLQconst [32] x4:(MOVBload [i-4] {s} p mem)))
-    s4:(SHLQconst [40] x5:(MOVBload [i-5] {s} p mem)))
-    s5:(SHLQconst [48] x6:(MOVBload [i-6] {s} p mem)))
-    s6:(SHLQconst [56] x7:(MOVBload [i-7] {s} p mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+
+// Big-endian indexed loads
+
+(ORL
+                       x1:(MOVBloadidx1 [i1] {s} p idx mem)
+    sh:(SHLLconst [8]  x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+
+(ORQ
+                       x1:(MOVBloadidx1 [i1] {s} p idx mem)
+    sh:(SHLQconst [8]  x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+
+(ORL
+                        r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
+    sh:(SHLLconst [16]  r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+
+(ORQ
+                        r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
+    sh:(SHLQconst [16]  r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+
+(ORQ
+                        r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem))
+    sh:(SHLQconst [32]  r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+
+(ORL
+    s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+    or:(ORL
+        s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQload [i-7] {s} p mem))
-
-(ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ
-                       x0:(MOVBloadidx1 [i] {s} p idx mem)
-    s0:(SHLQconst [8]  x1:(MOVBloadidx1 [i-1] {s} p idx mem)))
-    s1:(SHLQconst [16] x2:(MOVBloadidx1 [i-2] {s} p idx mem)))
-    s2:(SHLQconst [24] x3:(MOVBloadidx1 [i-3] {s} p idx mem)))
-    s3:(SHLQconst [32] x4:(MOVBloadidx1 [i-4] {s} p idx mem)))
-    s4:(SHLQconst [40] x5:(MOVBloadidx1 [i-5] {s} p idx mem)))
-    s5:(SHLQconst [48] x6:(MOVBloadidx1 [i-6] {s} p idx mem)))
-    s6:(SHLQconst [56] x7:(MOVBloadidx1 [i-7] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+
+(ORQ
+    s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+    or:(ORQ
+        s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQloadidx1 <v.Type> [i-7] {s} p idx mem))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+
+(ORQ
+    s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+    or:(ORQ
+        s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       y))
+  && i1 == i0+2
+  && j1 == j0-16
+  && j1 % 32 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(s0)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
 
 // Combine 2 byte stores + shift into rolw 8 + word store
 (MOVBstore [i] {s} p w
 // Merge load and op
 // TODO: add indexed variants?
 (ADDQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
-(ADDQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
 (ADDL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
-(ADDL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
 (SUBQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBQmem x [off] {sym} ptr mem)
 (SUBL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBLmem x [off] {sym} ptr mem)
 (ANDQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
-(ANDQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
 (ANDL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
-(ANDL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
-(ORQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
-(ORQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
-(ORL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
-(ORL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
+(ORQ  x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
+(ORL  x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
 (XORQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
-(XORQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
 (XORL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
-(XORL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
 (ADDSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
-(ADDSD l:(MOVSDload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
 (ADDSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
-(ADDSS l:(MOVSSload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
 (SUBSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBSDmem x [off] {sym} ptr mem)
 (SUBSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBSSmem x [off] {sym} ptr mem)
 (MULSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
-(MULSD l:(MOVSDload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
 (MULSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
-(MULSS l:(MOVSSload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
 
 // Merge ADDQconst and LEAQ into atomic loads.
 (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
index f9731047e7e7ccca350d375455b3cefcf86b87ab..6f0845305c8b8282f088a0ab81010a6afd4d8ad3 100644 (file)
@@ -202,10 +202,10 @@ func init() {
                {name: "MULQconst", argLength: 1, reg: gp11, asm: "IMULQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
                {name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
 
-               {name: "HMULQ", argLength: 2, reg: gp21hmul, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width
-               {name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
-               {name: "HMULQU", argLength: 2, reg: gp21hmul, asm: "MULQ", clobberFlags: true}, // (arg0 * arg1) >> width
-               {name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULQ", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULQU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULQ", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULLU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
 
                {name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits
 
@@ -216,8 +216,8 @@ func init() {
                {name: "DIVLU", argLength: 2, reg: gp11div, typ: "(UInt32,UInt32)", asm: "DIVL", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
                {name: "DIVWU", argLength: 2, reg: gp11div, typ: "(UInt16,UInt16)", asm: "DIVW", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
 
-               {name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, asm: "MULQ", clobberFlags: true},     // arg0 * arg1, returns (hi, lo)
-               {name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true}, // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r)
+               {name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, commutative: true, asm: "MULQ", clobberFlags: true}, // arg0 * arg1, returns (hi, lo)
+               {name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true},                // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r)
 
                {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
                {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
@@ -251,43 +251,43 @@ func init() {
                {name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32
                {name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64
 
-               {name: "TESTQ", argLength: 2, reg: gp2flags, asm: "TESTQ", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTL", argLength: 2, reg: gp2flags, asm: "TESTL", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTW", argLength: 2, reg: gp2flags, asm: "TESTW", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTB", argLength: 2, reg: gp2flags, asm: "TESTB", typ: "Flags"},                    // (arg0 & arg1) compare to 0
+               {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTB", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTB", typ: "Flags"}, // (arg0 & arg1) compare to 0
                {name: "TESTQconst", argLength: 1, reg: gp1flags, asm: "TESTQ", typ: "Flags", aux: "Int64"}, // (arg0 & auxint) compare to 0
                {name: "TESTLconst", argLength: 1, reg: gp1flags, asm: "TESTL", typ: "Flags", aux: "Int32"}, // (arg0 & auxint) compare to 0
                {name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0
                {name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"},  // (arg0 & auxint) compare to 0
 
-               {name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 64
-               {name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 32
-               {name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-63
-               {name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-31
+               {name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true, clobberFlags: true},              // arg0 << arg1, shift amount is mod 64
+               {name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true},              // arg0 << arg1, shift amount is mod 32
+               {name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-63
+               {name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-31
                // Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount!
 
-               {name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 64
-               {name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-               {name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-               {name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-               {name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-63
-               {name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-               {name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-15
-               {name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // unsigned arg0 >> auxint, shift amount 0-7
-
-               {name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 64
-               {name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-               {name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-               {name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-               {name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
-               {name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-               {name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-15
-               {name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // signed arg0 >> auxint, shift amount 0-7
-
-               {name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-63
-               {name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31
-               {name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
-               {name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // arg0 rotate left auxint, rotate amount 0-7
+               {name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true, clobberFlags: true},              // unsigned arg0 >> arg1, shift amount is mod 64
+               {name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true},              // unsigned arg0 >> arg1, shift amount is mod 32
+               {name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true},              // unsigned arg0 >> arg1, shift amount is mod 32
+               {name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true},              // unsigned arg0 >> arg1, shift amount is mod 32
+               {name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-63
+               {name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
+               {name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-15
+               {name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-7
+
+               {name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ", resultInArg0: true, clobberFlags: true},              // signed arg0 >> arg1, shift amount is mod 64
+               {name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true},              // signed arg0 >> arg1, shift amount is mod 32
+               {name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true},              // signed arg0 >> arg1, shift amount is mod 32
+               {name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true},              // signed arg0 >> arg1, shift amount is mod 32
+               {name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
+               {name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
+               {name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-15
+               {name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-7
+
+               {name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-63
+               {name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31
+               {name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
+               {name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-7
 
                {name: "ADDLmem", argLength: 3, reg: gp21load, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
                {name: "ADDQmem", argLength: 3, reg: gp21load, asm: "ADDQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
@@ -374,7 +374,7 @@ func init() {
                {name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.
 
                {name: "LEAQ", argLength: 1, reg: gp11sb, asm: "LEAQ", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
-               {name: "LEAQ1", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                                      // arg0 + arg1 + auxint + aux
+               {name: "LEAQ1", argLength: 2, reg: gp21sb, commutative: true, aux: "SymOff", symEffect: "Addr"},                   // arg0 + arg1 + auxint + aux
                {name: "LEAQ2", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                                      // arg0 + 2*arg1 + auxint + aux
                {name: "LEAQ4", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                                      // arg0 + 4*arg1 + auxint + aux
                {name: "LEAQ8", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                                      // arg0 + 8*arg1 + auxint + aux
@@ -398,21 +398,21 @@ func init() {
                {name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},  // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem
 
                // indexed loads/stores
-               {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
-               {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
-               {name: "MOVQloadidx1", argLength: 3, reg: gploadidx, asm: "MOVQ", aux: "SymOff", symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVQ", aux: "SymOff", symEffect: "Read"},    // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
+               {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8", symEffect: "Read"},  // load a byte from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", symEffect: "Read"},                    // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
+               {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVL", aux: "SymOff", typ: "UInt32", symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", typ: "UInt32", symEffect: "Read"},                       // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
+               {name: "MOVQloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVQ", aux: "SymOff", typ: "UInt64", symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVQ", aux: "SymOff", typ: "UInt64", symEffect: "Read"},                       // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
                // TODO: sign-extending indexed loads
-               {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
-               {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
-               {name: "MOVQstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVQ", aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVQ", aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
+               {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"},                    // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
+               {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"},                    // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
+               {name: "MOVQstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVQ", aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVQ", aux: "SymOff", symEffect: "Write"},                    // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
                // TODO: add size-mismatched indexed loads, like MOVBstoreidx4.
 
                // For storeconst ops, the AuxInt field encodes both
index 5ab695c76bb5ebd9149dc08013e0a94f7b1113df..0ecb6d09c04b8a03898ddea9bc495f8c932c120e 100644 (file)
 (MOVWloadshiftRA ptr idx [c] (MOVWstoreshiftRA ptr2 idx [d] x _)) && c==d && isSamePtr(ptr, ptr2) -> x
 
 // fold constant into arithmatic ops
-(ADD (MOVWconst [c]) x) -> (ADDconst [c] x)
 (ADD x (MOVWconst [c])) -> (ADDconst [c] x)
 (SUB (MOVWconst [c]) x) -> (RSBconst [c] x)
 (SUB x (MOVWconst [c])) -> (SUBconst [c] x)
 (RSB (MOVWconst [c]) x) -> (SUBconst [c] x)
 (RSB x (MOVWconst [c])) -> (RSBconst [c] x)
 
-(ADDS (MOVWconst [c]) x) -> (ADDSconst [c] x)
 (ADDS x (MOVWconst [c])) -> (ADDSconst [c] x)
-(SUBS (MOVWconst [c]) x) -> (RSBSconst [c] x)
 (SUBS x (MOVWconst [c])) -> (SUBSconst [c] x)
 
 (ADC (MOVWconst [c]) x flags) -> (ADCconst [c] x flags)
 (SBC (MOVWconst [c]) x flags) -> (RSCconst [c] x flags)
 (SBC x (MOVWconst [c]) flags) -> (SBCconst [c] x flags)
 
-(AND (MOVWconst [c]) x) -> (ANDconst [c] x)
 (AND x (MOVWconst [c])) -> (ANDconst [c] x)
-(OR (MOVWconst [c]) x) -> (ORconst [c] x)
-(OR x (MOVWconst [c])) -> (ORconst [c] x)
-(XOR (MOVWconst [c]) x) -> (XORconst [c] x)
+(OR  x (MOVWconst [c])) -> (ORconst [c] x)
 (XOR x (MOVWconst [c])) -> (XORconst [c] x)
 (BIC x (MOVWconst [c])) -> (BICconst [c] x)
 
 (MUL x (MOVWconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
 (MUL x (MOVWconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
-(MUL (MOVWconst [c]) x) && int32(c) == -1 -> (RSBconst [0] x)
-(MUL (MOVWconst [0]) _) -> (MOVWconst [0])
-(MUL (MOVWconst [1]) x) -> x
-(MUL (MOVWconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
-(MUL (MOVWconst [c]) x) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
-(MUL (MOVWconst [c]) x) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (RSBshiftLL x x [log2(c+1)])
-(MUL (MOVWconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-(MUL (MOVWconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-(MUL (MOVWconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
-(MUL (MOVWconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-
 (MULA x (MOVWconst [c]) a) && int32(c) == -1 -> (SUB a x)
 (MULA _ (MOVWconst [0]) a) -> a
 (MULA x (MOVWconst [1]) a) -> (ADD x a)
 
 // absorb shifts into ops
 (ADD x (SLLconst [c] y)) -> (ADDshiftLL x y [c])
-(ADD (SLLconst [c] y) x) -> (ADDshiftLL x y [c])
 (ADD x (SRLconst [c] y)) -> (ADDshiftRL x y [c])
-(ADD (SRLconst [c] y) x) -> (ADDshiftRL x y [c])
 (ADD x (SRAconst [c] y)) -> (ADDshiftRA x y [c])
-(ADD (SRAconst [c] y) x) -> (ADDshiftRA x y [c])
 (ADD x (SLL y z)) -> (ADDshiftLLreg x y z)
-(ADD (SLL y z) x) -> (ADDshiftLLreg x y z)
 (ADD x (SRL y z)) -> (ADDshiftRLreg x y z)
-(ADD (SRL y z) x) -> (ADDshiftRLreg x y z)
 (ADD x (SRA y z)) -> (ADDshiftRAreg x y z)
-(ADD (SRA y z) x) -> (ADDshiftRAreg x y z)
 (ADC x (SLLconst [c] y) flags) -> (ADCshiftLL x y [c] flags)
 (ADC (SLLconst [c] y) x flags) -> (ADCshiftLL x y [c] flags)
 (ADC x (SRLconst [c] y) flags) -> (ADCshiftRL x y [c] flags)
 (ADC x (SRA y z) flags) -> (ADCshiftRAreg x y z flags)
 (ADC (SRA y z) x flags) -> (ADCshiftRAreg x y z flags)
 (ADDS x (SLLconst [c] y)) -> (ADDSshiftLL x y [c])
-(ADDS (SLLconst [c] y) x) -> (ADDSshiftLL x y [c])
 (ADDS x (SRLconst [c] y)) -> (ADDSshiftRL x y [c])
-(ADDS (SRLconst [c] y) x) -> (ADDSshiftRL x y [c])
 (ADDS x (SRAconst [c] y)) -> (ADDSshiftRA x y [c])
-(ADDS (SRAconst [c] y) x) -> (ADDSshiftRA x y [c])
 (ADDS x (SLL y z)) -> (ADDSshiftLLreg x y z)
-(ADDS (SLL y z) x) -> (ADDSshiftLLreg x y z)
 (ADDS x (SRL y z)) -> (ADDSshiftRLreg x y z)
-(ADDS (SRL y z) x) -> (ADDSshiftRLreg x y z)
 (ADDS x (SRA y z)) -> (ADDSshiftRAreg x y z)
-(ADDS (SRA y z) x) -> (ADDSshiftRAreg x y z)
 (SUB x (SLLconst [c] y)) -> (SUBshiftLL x y [c])
 (SUB (SLLconst [c] y) x) -> (RSBshiftLL x y [c])
 (SUB x (SRLconst [c] y)) -> (SUBshiftRL x y [c])
 (RSB x (SRA y z)) -> (RSBshiftRAreg x y z)
 (RSB (SRA y z) x) -> (SUBshiftRAreg x y z)
 (AND x (SLLconst [c] y)) -> (ANDshiftLL x y [c])
-(AND (SLLconst [c] y) x) -> (ANDshiftLL x y [c])
 (AND x (SRLconst [c] y)) -> (ANDshiftRL x y [c])
-(AND (SRLconst [c] y) x) -> (ANDshiftRL x y [c])
 (AND x (SRAconst [c] y)) -> (ANDshiftRA x y [c])
-(AND (SRAconst [c] y) x) -> (ANDshiftRA x y [c])
 (AND x (SLL y z)) -> (ANDshiftLLreg x y z)
-(AND (SLL y z) x) -> (ANDshiftLLreg x y z)
 (AND x (SRL y z)) -> (ANDshiftRLreg x y z)
-(AND (SRL y z) x) -> (ANDshiftRLreg x y z)
 (AND x (SRA y z)) -> (ANDshiftRAreg x y z)
-(AND (SRA y z) x) -> (ANDshiftRAreg x y z)
 (OR x (SLLconst [c] y)) -> (ORshiftLL x y [c])
-(OR (SLLconst [c] y) x) -> (ORshiftLL x y [c])
 (OR x (SRLconst [c] y)) -> (ORshiftRL x y [c])
-(OR (SRLconst [c] y) x) -> (ORshiftRL x y [c])
 (OR x (SRAconst [c] y)) -> (ORshiftRA x y [c])
-(OR (SRAconst [c] y) x) -> (ORshiftRA x y [c])
 (OR x (SLL y z)) -> (ORshiftLLreg x y z)
-(OR (SLL y z) x) -> (ORshiftLLreg x y z)
 (OR x (SRL y z)) -> (ORshiftRLreg x y z)
-(OR (SRL y z) x) -> (ORshiftRLreg x y z)
 (OR x (SRA y z)) -> (ORshiftRAreg x y z)
-(OR (SRA y z) x) -> (ORshiftRAreg x y z)
 (XOR x (SLLconst [c] y)) -> (XORshiftLL x y [c])
-(XOR (SLLconst [c] y) x) -> (XORshiftLL x y [c])
 (XOR x (SRLconst [c] y)) -> (XORshiftRL x y [c])
-(XOR (SRLconst [c] y) x) -> (XORshiftRL x y [c])
 (XOR x (SRAconst [c] y)) -> (XORshiftRA x y [c])
-(XOR (SRAconst [c] y) x) -> (XORshiftRA x y [c])
 (XOR x (SRRconst [c] y)) -> (XORshiftRR x y [c])
-(XOR (SRRconst [c] y) x) -> (XORshiftRR x y [c])
 (XOR x (SLL y z)) -> (XORshiftLLreg x y z)
-(XOR (SLL y z) x) -> (XORshiftLLreg x y z)
 (XOR x (SRL y z)) -> (XORshiftRLreg x y z)
-(XOR (SRL y z) x) -> (XORshiftRLreg x y z)
 (XOR x (SRA y z)) -> (XORshiftRAreg x y z)
-(XOR (SRA y z) x) -> (XORshiftRAreg x y z)
 (BIC x (SLLconst [c] y)) -> (BICshiftLL x y [c])
 (BIC x (SRLconst [c] y)) -> (BICshiftRL x y [c])
 (BIC x (SRAconst [c] y)) -> (BICshiftRA x y [c])
 
 // generic simplifications
 (ADD x (RSBconst [0] y)) -> (SUB x y)
-(ADD (RSBconst [0] y) x) -> (SUB x y)
 (ADD <t> (RSBconst [c] x) (RSBconst [d] y)) -> (RSBconst [c+d] (ADD <t> x y))
 (SUB x x) -> (MOVWconst [0])
 (RSB x x) -> (MOVWconst [0])
 (BIC x x) -> (MOVWconst [0])
 
 (ADD (MUL x y) a) -> (MULA x y a)
-(ADD a (MUL x y)) -> (MULA x y a)
 
 (AND x (MVN y)) -> (BIC x y)
-(AND (MVN y) x) -> (BIC x y)
 
 // simplification with *shift ops
 (SUBshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVWconst [0])
 (BICshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVWconst [0])
 (BICshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVWconst [0])
 (AND x (MVNshiftLL y [c])) -> (BICshiftLL x y [c])
-(AND (MVNshiftLL y [c]) x) -> (BICshiftLL x y [c])
 (AND x (MVNshiftRL y [c])) -> (BICshiftRL x y [c])
-(AND (MVNshiftRL y [c]) x) -> (BICshiftRL x y [c])
 (AND x (MVNshiftRA y [c])) -> (BICshiftRA x y [c])
-(AND (MVNshiftRA y [c]) x) -> (BICshiftRA x y [c])
 
 // floating point optimizations
 (CMPF x (MOVFconst [0])) -> (CMPF0 x)
index 41661082c7cd421953faaa89e565767f750e3fd6..6adbaf5ba2868f133c105cf345f0a73f18618afe 100644 (file)
 (MOVDreg x) && x.Uses == 1 -> (MOVDnop x)
 
 // fold constant into arithmatic ops
-(ADD (MOVDconst [c]) x) -> (ADDconst [c] x)
 (ADD x (MOVDconst [c])) -> (ADDconst [c] x)
 (SUB x (MOVDconst [c])) -> (SUBconst [c] x)
-(AND (MOVDconst [c]) x) -> (ANDconst [c] x)
 (AND x (MOVDconst [c])) -> (ANDconst [c] x)
-(OR  (MOVDconst [c]) x) -> (ORconst  [c] x)
 (OR  x (MOVDconst [c])) -> (ORconst  [c] x)
-(XOR (MOVDconst [c]) x) -> (XORconst [c] x)
 (XOR x (MOVDconst [c])) -> (XORconst [c] x)
 (BIC x (MOVDconst [c])) -> (BICconst [c] x)
 
 (MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
 (MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
-(MUL (MOVDconst [-1]) x) -> (NEG x)
-(MUL (MOVDconst [0]) _) -> (MOVDconst [0])
-(MUL (MOVDconst [1]) x) -> x
-(MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
-(MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
-(MUL (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
-(MUL (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-(MUL (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-(MUL (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-(MUL (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-(MUL (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-
 (MULW x (MOVDconst [c])) && int32(c)==-1 -> (NEG x)
 (MULW _ (MOVDconst [c])) && int32(c)==0 -> (MOVDconst [0])
 (MULW x (MOVDconst [c])) && int32(c)==1 -> x
 (MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
 (MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
-(MULW (MOVDconst [c]) x) && int32(c)==-1 -> (NEG x)
-(MULW (MOVDconst [c]) _) && int32(c)==0 -> (MOVDconst [0])
-(MULW (MOVDconst [c]) x) && int32(c)==1 -> x
-(MULW (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
-(MULW (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
-(MULW (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-(MULW (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-(MULW (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-(MULW (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-(MULW (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-
 // div by constant
 (UDIV x (MOVDconst [1])) -> x
 (UDIV x (MOVDconst [c])) && isPowerOfTwo(c) -> (SRLconst [log2(c)] x)
 
 // generic simplifications
 (ADD x (NEG y)) -> (SUB x y)
-(ADD (NEG y) x) -> (SUB x y)
 (SUB x x) -> (MOVDconst [0])
 (AND x x) -> x
 (OR  x x) -> x
 
 // absorb shifts into ops
 (ADD x (SLLconst [c] y)) -> (ADDshiftLL x y [c])
-(ADD (SLLconst [c] y) x) -> (ADDshiftLL x y [c])
 (ADD x (SRLconst [c] y)) -> (ADDshiftRL x y [c])
-(ADD (SRLconst [c] y) x) -> (ADDshiftRL x y [c])
 (ADD x (SRAconst [c] y)) -> (ADDshiftRA x y [c])
-(ADD (SRAconst [c] y) x) -> (ADDshiftRA x y [c])
 (SUB x (SLLconst [c] y)) -> (SUBshiftLL x y [c])
 (SUB x (SRLconst [c] y)) -> (SUBshiftRL x y [c])
 (SUB x (SRAconst [c] y)) -> (SUBshiftRA x y [c])
 (AND x (SLLconst [c] y)) -> (ANDshiftLL x y [c])
-(AND (SLLconst [c] y) x) -> (ANDshiftLL x y [c])
 (AND x (SRLconst [c] y)) -> (ANDshiftRL x y [c])
-(AND (SRLconst [c] y) x) -> (ANDshiftRL x y [c])
 (AND x (SRAconst [c] y)) -> (ANDshiftRA x y [c])
-(AND (SRAconst [c] y) x) -> (ANDshiftRA x y [c])
-(OR  x s:(SLLconst [c] y)) && s.Uses == 1 && clobber(s) -> (ORshiftLL  x y [c]) // useful for combined load
-(OR  s:(SLLconst [c] y) x) && s.Uses == 1 && clobber(s) -> (ORshiftLL  x y [c])
-(OR  x (SLLconst [c] y)) -> (ORshiftLL  x y [c])
-(OR  (SLLconst [c] y) x) -> (ORshiftLL  x y [c])
+(OR  x (SLLconst [c] y)) -> (ORshiftLL  x y [c]) // useful for combined load
 (OR  x (SRLconst [c] y)) -> (ORshiftRL  x y [c])
-(OR  (SRLconst [c] y) x) -> (ORshiftRL  x y [c])
 (OR  x (SRAconst [c] y)) -> (ORshiftRA  x y [c])
-(OR  (SRAconst [c] y) x) -> (ORshiftRA  x y [c])
 (XOR x (SLLconst [c] y)) -> (XORshiftLL x y [c])
-(XOR (SLLconst [c] y) x) -> (XORshiftLL x y [c])
 (XOR x (SRLconst [c] y)) -> (XORshiftRL x y [c])
-(XOR (SRLconst [c] y) x) -> (XORshiftRL x y [c])
 (XOR x (SRAconst [c] y)) -> (XORshiftRA x y [c])
-(XOR (SRAconst [c] y) x) -> (XORshiftRA x y [c])
 (BIC x (SLLconst [c] y)) -> (BICshiftLL x y [c])
 (BIC x (SRLconst [c] y)) -> (BICshiftRL x y [c])
 (BIC x (SRAconst [c] y)) -> (BICshiftRA x y [c])
 // little endian loads
 // b[0] | b[1]<<8 -> load 16-bit
 (ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       && i1 == i0+1
        && x0.Uses == 1 && x1.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1
        && mergePoint(b,x0,x1) != nil
        && clobber(x0) && clobber(x1)
        && clobber(y0) && clobber(y1)
-       -> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       -> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
 
 // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit
 (ORshiftLL <t> [24] o0:(ORshiftLL [16]
-                   x0:(MOVHUload [i]   {s} p mem)
-       y1:(MOVDnop x1:(MOVBUload [i+2] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i+3] {s} p mem)))
+                   x0:(MOVHUload [i0] {s} p mem)
+       y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
+       && i2 == i0+2
+       && i3 == i0+3
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
        && y1.Uses == 1 && y2.Uses == 1
        && o0.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2)
        && clobber(y1) && clobber(y2)
        && clobber(o0)
-       -> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       -> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
 
 // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit
 (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-                   x0:(MOVWUload [i]   {s} p mem)
-       y1:(MOVDnop x1:(MOVBUload [i+4] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i+5] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i+6] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i+7] {s} p mem)))
+                   x0:(MOVWUload [i0] {s} p mem)
+       y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
+       && i4 == i0+4
+       && i5 == i0+5
+       && i6 == i0+6
+       && i7 == i0+7
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
        && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
        && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
        && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
        && clobber(o0) && clobber(o1) && clobber(o2)
-       -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
 
 // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
        && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
        && clobber(o0) && clobber(o1) && clobber(s0)
-       -> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i-3] p) mem)
+       -> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
 
 // b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i-4] {s} p mem)))
-       y5:(MOVDnop x5:(MOVBUload [i-5] {s} p mem)))
-       y6:(MOVDnop x6:(MOVBUload [i-6] {s} p mem)))
-       y7:(MOVDnop x7:(MOVBUload [i-7] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem)))
+       y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem)))
+       y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem)))
+       y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
+       && i4 == i0+4
+       && i5 == i0+5
+       && i6 == i0+6
+       && i7 == i0+7
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
        && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
        && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
        && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
        && clobber(o4) && clobber(o5) && clobber(s0)
-       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i-7] p) mem))
+       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
 
 // big endian loads
 // b[1] | b[0]<<8 -> load 16-bit, reverse
 (ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       && ((i-1)%2 == 0 || i-1<256 && i-1>-256 && !isArg(s) && !isAuto(s))
+       y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && (i0%2 == 0 || i0<256 && i0>-256 && !isArg(s) && !isAuto(s))
        && x0.Uses == 1 && x1.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1
        && mergePoint(b,x0,x1) != nil
        && clobber(x0) && clobber(x1)
        && clobber(y0) && clobber(y1)
-       -> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i-1] {s} p mem))
+       -> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
 
 // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse
 (ORshiftLL <t> [24] o0:(ORshiftLL [16]
-       y0:(REV16W  x0:(MOVHUload [i]   {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
+       y0:(REV16W  x0:(MOVHUload [i2] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
        && o0.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2)
        && clobber(y0) && clobber(y1) && clobber(y2)
        && clobber(o0)
-       -> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i-2] p) mem))
+       -> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
 
 // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse
 (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-       y0:(REVW    x0:(MOVWUload [i]   {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i-4] {s} p mem)))
+       y0:(REVW    x0:(MOVWUload [i4] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
+       && i4 == i0+4
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
        && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
        && clobber(o0) && clobber(o1) && clobber(o2)
-       -> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i-4] p) mem))
+       -> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
 
 // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i+2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i+3] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
        && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
        && clobber(o0) && clobber(o1) && clobber(s0)
-       -> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i] p) mem))
+       -> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
 
 // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i+2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i+3] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i+4] {s} p mem)))
-       y5:(MOVDnop x5:(MOVBUload [i+5] {s} p mem)))
-       y6:(MOVDnop x6:(MOVBUload [i+6] {s} p mem)))
-       y7:(MOVDnop x7:(MOVBUload [i+7] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem)))
+       y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem)))
+       y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem)))
+       y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
+       && i4 == i0+4
+       && i5 == i0+5
+       && i6 == i0+6
+       && i7 == i0+7
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
        && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
        && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
        && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
        && clobber(o4) && clobber(o5) && clobber(s0)
-       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i] p) mem))
+       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
index 3f40951d3bf78456e6549cc3400df19b465ab74b..c4130aa57a4165ea09da94ff18991db8519e6fff 100644 (file)
 (MOVWreg x) && x.Uses == 1 -> (MOVWnop x)
 
 // fold constant into arithmatic ops
-(ADD (MOVWconst [c]) x) -> (ADDconst [c] x)
 (ADD x (MOVWconst [c])) -> (ADDconst [c] x)
 (SUB x (MOVWconst [c])) -> (SUBconst [c] x)
-(AND (MOVWconst [c]) x) -> (ANDconst [c] x)
 (AND x (MOVWconst [c])) -> (ANDconst [c] x)
-(OR  (MOVWconst [c]) x) -> (ORconst  [c] x)
 (OR  x (MOVWconst [c])) -> (ORconst  [c] x)
-(XOR (MOVWconst [c]) x) -> (XORconst [c] x)
 (XOR x (MOVWconst [c])) -> (XORconst [c] x)
-(NOR (MOVWconst [c]) x) -> (NORconst [c] x)
 (NOR x (MOVWconst [c])) -> (NORconst [c] x)
 
 (SLL _ (MOVWconst [c])) && uint32(c)>=32 -> (MOVWconst [0])
 
 // generic simplifications
 (ADD x (NEG y)) -> (SUB x y)
-(ADD (NEG y) x) -> (SUB x y)
 (SUB x x) -> (MOVWconst [0])
 (SUB (MOVWconst [0]) x) -> (NEG x)
 (AND x x) -> x
 
 // conditional move
 (CMOVZ _ b (MOVWconst [0])) -> b
-(CMOVZ a _ (MOVWconst [c])) && c!=0-> a
+(CMOVZ a _ (MOVWconst [c])) && c!=0 -> a
 (CMOVZzero _ (MOVWconst [0])) -> (MOVWconst [0])
-(CMOVZzero a (MOVWconst [c])) && c!=0-> a
+(CMOVZzero a (MOVWconst [c])) && c!=0 -> a
 (CMOVZ a (MOVWconst [0]) c) -> (CMOVZzero a c)
 
 // atomic
 (LoweredAtomicStore ptr (MOVWconst [0]) mem) -> (LoweredAtomicStorezero ptr mem)
-(LoweredAtomicAdd ptr (MOVWconst [c]) mem) && is16Bit(c)-> (LoweredAtomicAddconst [c] ptr mem)
+(LoweredAtomicAdd ptr (MOVWconst [c]) mem) && is16Bit(c) -> (LoweredAtomicAddconst [c] ptr mem)
 
index 42b0dc51bb3e743fa16fc4e7f2b445d78277132b..6dd5461f1fed1ddbd59bd48889ff843b1e0302eb 100644 (file)
 (MOVVreg x) && x.Uses == 1 -> (MOVVnop x)
 
 // fold constant into arithmatic ops
-(ADDV (MOVVconst [c]) x) && is32Bit(c) -> (ADDVconst [c] x)
 (ADDV x (MOVVconst [c])) && is32Bit(c) -> (ADDVconst [c] x)
 (SUBV x (MOVVconst [c])) && is32Bit(c) -> (SUBVconst [c] x)
-(AND (MOVVconst [c]) x) && is32Bit(c) -> (ANDconst [c] x)
 (AND x (MOVVconst [c])) && is32Bit(c) -> (ANDconst [c] x)
-(OR  (MOVVconst [c]) x) && is32Bit(c) -> (ORconst  [c] x)
 (OR  x (MOVVconst [c])) && is32Bit(c) -> (ORconst  [c] x)
-(XOR (MOVVconst [c]) x) && is32Bit(c) -> (XORconst [c] x)
 (XOR x (MOVVconst [c])) && is32Bit(c) -> (XORconst [c] x)
-(NOR (MOVVconst [c]) x) && is32Bit(c) -> (NORconst [c] x)
 (NOR x (MOVVconst [c])) && is32Bit(c) -> (NORconst [c] x)
 
 (SLLV _ (MOVVconst [c])) && uint64(c)>=64 -> (MOVVconst [0])
 
 // generic simplifications
 (ADDV x (NEGV y)) -> (SUBV x y)
-(ADDV (NEGV y) x) -> (SUBV x y)
 (SUBV x x) -> (MOVVconst [0])
 (SUBV (MOVVconst [0]) x) -> (NEGV x)
 (AND x x) -> x
index a44e50629de572021ff6cf5105b2ff0ee6aef97c..36018209932803e0e76a61d21d8052a8508be77a 100644 (file)
 (Move [8] {t} dst src mem) && t.(Type).Alignment()%4 == 0 ->
        (MOVWstore [4] dst (MOVWZload [4] src mem)
                (MOVWstore dst (MOVWZload src mem) mem))
-(Move [8] {t} dst src mem) && t.(Type).Alignment()%2 == 0->
+(Move [8] {t} dst src mem) && t.(Type).Alignment()%2 == 0 ->
        (MOVHstore [6] dst (MOVHZload [6] src mem)
                (MOVHstore [4] dst (MOVHZload [4] src mem)
                        (MOVHstore [2] dst (MOVHZload [2] src mem)
 (AND x (MOVDconst [c])) && isU16Bit(c) -> (ANDconst [c] x)
 (XOR x (MOVDconst [c])) && isU32Bit(c) -> (XORconst [c] x)
 (OR x (MOVDconst [c])) && isU32Bit(c) -> (ORconst [c] x)
-(AND (MOVDconst [c]) x) && isU16Bit(c) -> (ANDconst [c] x)
-(XOR (MOVDconst [c]) x) && isU32Bit(c) -> (XORconst [c] x)
-(OR (MOVDconst [c]) x) && isU32Bit(c) -> (ORconst [c] x)
 
 // Simplify consts
 (ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c&d] x)
 
 // Arithmetic constant ops
 
-(ADD (MOVDconst [c]) x) && is32Bit(c) -> (ADDconst [c] x)
 (ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
 (ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
 (ADDconst [0] x) -> x
 (AND x:(MOVBZload _ _) (MOVDconst [c])) -> (ANDconst [c&0xFF] x)
 
 // floating-point fused multiply-add/sub
-(FADD z (FMUL x y)) -> (FMADD x y z)
 (FADD (FMUL x y) z) -> (FMADD x y z)
 (FSUB (FMUL x y) z) -> (FMSUB x y z)
-(FADDS z (FMULS x y)) -> (FMADDS x y z)
 (FADDS (FMULS x y) z) -> (FMADDS x y z)
 (FSUBS (FMULS x y) z) -> (FMSUBS x y z)
index 387584dbdaaa14f00002da5c4531ae2b766836d0..93d2981be3337e20b2cf7a7891ffa560d823bede 100644 (file)
@@ -224,7 +224,7 @@ func init() {
                {name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                // arg0&^arg1
                {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                 // arg0|arg1
                {name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                  // arg0|^arg1
-               {name: "NOR", argLength: 2, reg: gp21, asm: "NOR"},                                  // ^(arg0|arg1)
+               {name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},               // ^(arg0|arg1)
                {name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
                {name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
                {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                  // -arg0 (integer)
index ef96cc0d8210f492fc87886365fc649bca90ec84..3c62656ba6f3f46b5c0398d0d53ebb8039a74254 100644 (file)
 
 // Fold constants into instructions.
 (ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
-(ADD (MOVDconst [c]) x) && is32Bit(c) -> (ADDconst [c] x)
 (ADDW x (MOVDconst [c])) -> (ADDWconst [c] x)
-(ADDW (MOVDconst [c]) x) -> (ADDWconst [c] x)
 
 (SUB x (MOVDconst [c])) && is32Bit(c) -> (SUBconst x [c])
 (SUB (MOVDconst [c]) x) && is32Bit(c) -> (NEG (SUBconst <v.Type> x [c]))
 (SUBW (MOVDconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [c]))
 
 (MULLD x (MOVDconst [c])) && is32Bit(c) -> (MULLDconst [c] x)
-(MULLD (MOVDconst [c]) x) && is32Bit(c) -> (MULLDconst [c] x)
 (MULLW x (MOVDconst [c])) -> (MULLWconst [c] x)
-(MULLW (MOVDconst [c]) x) -> (MULLWconst [c] x)
 
 // NILF instructions leave the high 32 bits unchanged which is
 // equivalent to the leftmost 32 bits being set.
 // TODO(mundaym): modify the assembler to accept 64-bit values
 // and use isU32Bit(^c).
 (AND x (MOVDconst [c])) && is32Bit(c) && c < 0 -> (ANDconst [c] x)
-(AND (MOVDconst [c]) x) && is32Bit(c) && c < 0 -> (ANDconst [c] x)
 (ANDW x (MOVDconst [c])) -> (ANDWconst [c] x)
-(ANDW (MOVDconst [c]) x) -> (ANDWconst [c] x)
 
 (ANDWconst [c] (ANDWconst [d] x)) -> (ANDWconst [c & d] x)
 (ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c & d] x)
 
 (OR x (MOVDconst [c])) && isU32Bit(c) -> (ORconst [c] x)
-(OR (MOVDconst [c]) x) && isU32Bit(c) -> (ORconst [c] x)
 (ORW x (MOVDconst [c])) -> (ORWconst [c] x)
-(ORW (MOVDconst [c]) x) -> (ORWconst [c] x)
 
 (XOR x (MOVDconst [c])) && isU32Bit(c) -> (XORconst [c] x)
-(XOR (MOVDconst [c]) x) && isU32Bit(c) -> (XORconst [c] x)
 (XORW x (MOVDconst [c])) -> (XORWconst [c] x)
-(XORW (MOVDconst [c]) x) -> (XORWconst [c] x)
 
 (SLD x (MOVDconst [c])) -> (SLDconst [c&63] x)
 (SLW x (MOVDconst [c])) -> (SLWconst [c&63] x)
 (SRD x (ANDconst [63] y)) -> (SRD x y)
 
 // Rotate generation
-(ADD (SLDconst x [c]) (SRDconst x [64-c])) -> (RLLGconst [   c] x)
-( OR (SLDconst x [c]) (SRDconst x [64-c])) -> (RLLGconst [   c] x)
-(XOR (SLDconst x [c]) (SRDconst x [64-c])) -> (RLLGconst [   c] x)
-(ADD (SRDconst x [c]) (SLDconst x [64-c])) -> (RLLGconst [64-c] x)
-( OR (SRDconst x [c]) (SLDconst x [64-c])) -> (RLLGconst [64-c] x)
-(XOR (SRDconst x [c]) (SLDconst x [64-c])) -> (RLLGconst [64-c] x)
-
-(ADDW (SLWconst x [c]) (SRWconst x [32-c])) -> (RLLconst [   c] x)
-( ORW (SLWconst x [c]) (SRWconst x [32-c])) -> (RLLconst [   c] x)
-(XORW (SLWconst x [c]) (SRWconst x [32-c])) -> (RLLconst [   c] x)
-(ADDW (SRWconst x [c]) (SLWconst x [32-c])) -> (RLLconst [32-c] x)
-( ORW (SRWconst x [c]) (SLWconst x [32-c])) -> (RLLconst [32-c] x)
-(XORW (SRWconst x [c]) (SLWconst x [32-c])) -> (RLLconst [32-c] x)
+(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
+( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
+(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
+
+(ADDW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
+( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
+(XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
 
 (CMP x (MOVDconst [c])) && is32Bit(c) -> (CMPconst x [c])
 (CMP (MOVDconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPconst x [c]))
 (CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(uint32(c))]))
 
 // Using MOV{W,H,B}Zreg instead of AND is cheaper.
-(AND (MOVDconst [0xFF]) x) -> (MOVBZreg x)
 (AND x (MOVDconst [0xFF])) -> (MOVBZreg x)
-(AND (MOVDconst [0xFFFF]) x) -> (MOVHZreg x)
 (AND x (MOVDconst [0xFFFF])) -> (MOVHZreg x)
-(AND (MOVDconst [0xFFFFFFFF]) x) -> (MOVWZreg x)
 (AND x (MOVDconst [0xFFFFFFFF])) -> (MOVWZreg x)
 (ANDWconst [0xFF] x) -> (MOVBZreg x)
 (ANDWconst [0xFFFF] x) -> (MOVHZreg x)
 (ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
 (ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(c+d) -> (MOVDaddr [c+d] {s} x)
 (ADD x (MOVDaddr [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (MOVDaddridx [c] {s} x y)
-(ADD (MOVDaddr [c] {s} x) y) && x.Op != OpSB && y.Op != OpSB -> (MOVDaddridx [c] {s} x y)
 
 // fold ADDconst into MOVDaddrx
 (ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(c+d) -> (MOVDaddridx [c+d] {s} x y)
 (NEG (ADDconst [c] (NEG x))) && c != -(1<<31) -> (ADDconst [-c] x)
 
 // fused multiply-add
-(FADD x (FMUL y z)) -> (FMADD x y z)
-(FADDS x (FMULS y z)) -> (FMADDS x y z)
 (FADD (FMUL y z) x) -> (FMADD x y z)
 (FADDS (FMULS y z) x) -> (FMADDS x y z)
 (FSUB (FMUL y z) x) -> (FMSUB x y z)
   && clobber(x)
   -> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
 
-// Move shifts to second argument of ORs.  Helps load combining rules below.
-(ORW x:(SLWconst _) y) && y.Op != OpS390XSLWconst -> (ORW y x)
-(OR  x:(SLDconst _) y) && y.Op != OpS390XSLDconst -> (OR  y x)
-
 // Combining byte loads into larger (unaligned) loads.
 
-// Little endian loads.
+// Big-endian loads
 
-// b[0] | b[1]<<8 -> load 16-bit, reverse bytes
-(ORW                 x0:(MOVBZload [i]   {s} p mem)
-    s0:(SLWconst [8] x1:(MOVBZload [i+1] {s} p mem)))
+(ORW                 x1:(MOVBZload [i1] {s} p mem)
+    sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
+  && i1 == i0+1
   && p.Op != OpSB
   && x0.Uses == 1
   && x1.Uses == 1
-  && s0.Uses == 1
+  && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i] {s} p mem))
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
 
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit, reverse bytes
-(ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRload [i] {s} p mem))
-    s0:(SLWconst [16] x1:(MOVBZload [i+2] {s} p mem)))
-    s1:(SLWconst [24] x2:(MOVBZload [i+3] {s} p mem)))
+(OR                  x1:(MOVBZload [i1] {s} p mem)
+    sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
+  && i1 == i0+1
   && p.Op != OpSB
-  && z0.Uses == 1
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+
+(ORW                  x1:(MOVHZload [i1] {s} p mem)
+    sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
+  && i1 == i0+2
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+
+(OR                   x1:(MOVHZload [i1] {s} p mem)
+    sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
+  && i1 == i0+2
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+
+(OR                   x1:(MOVWZload [i1] {s} p mem)
+    sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
+  && i1 == i0+4
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
+
+(ORW
+    s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
+    or:(ORW
+        s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
-  && clobber(z0)
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVWBRload [i] {s} p mem)
-
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit, reverse bytes
-(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
-                      x0:(MOVBZload [i]   {s} p mem)
-    s0:(SLDconst [8]  x1:(MOVBZload [i+1] {s} p mem)))
-    s1:(SLDconst [16] x2:(MOVBZload [i+2] {s} p mem)))
-    s2:(SLDconst [24] x3:(MOVBZload [i+3] {s} p mem)))
-    s3:(SLDconst [32] x4:(MOVBZload [i+4] {s} p mem)))
-    s4:(SLDconst [40] x5:(MOVBZload [i+5] {s} p mem)))
-    s5:(SLDconst [48] x6:(MOVBZload [i+6] {s} p mem)))
-    s6:(SLDconst [56] x7:(MOVBZload [i+7] {s} p mem)))
-  && p.Op != OpSB
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+
+(OR
+    s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
+    or:(OR
+        s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRload [i] {s} p mem)
-
-// b[0] | b[1]<<8 -> load 16-bit, reverse bytes
-(ORW                 x0:(MOVBZloadidx [i]   {s} p idx mem)
-    s0:(SLWconst [8] x1:(MOVBZloadidx [i+1] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+
+(OR
+    s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
+    or:(OR
+        s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))
+       y))
+  && i1 == i0+2
+  && j1 == j0-16
+  && j1 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx <v.Type> [i] {s} p idx mem))
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+
+// Big-endian indexed loads
+
+(ORW                 x1:(MOVBZloadidx [i1] {s} p idx mem)
+    sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+1
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
 
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit, reverse bytes
-(ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRloadidx [i] {s} p idx mem))
-    s0:(SLWconst [16] x1:(MOVBZloadidx [i+2] {s} p idx mem)))
-    s1:(SLWconst [24] x2:(MOVBZloadidx [i+3] {s} p idx mem)))
-  && z0.Uses == 1
+(OR                  x1:(MOVBZloadidx [i1] {s} p idx mem)
+    sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+1
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+
+(ORW                  x1:(MOVHZloadidx [i1] {s} p idx mem)
+    sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+2
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+
+(OR                   x1:(MOVHZloadidx [i1] {s} p idx mem)
+    sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+2
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+
+(OR                   x1:(MOVWZloadidx [i1] {s} p idx mem)
+    sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+4
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+
+(ORW
+    s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
+    or:(ORW
+        s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
-  && clobber(z0)
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVWZreg (MOVWBRloadidx <v.Type> [i] {s} p idx mem))
-
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit, reverse bytes
-(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
-                      x0:(MOVBZloadidx [i]   {s} p idx mem)
-    s0:(SLDconst [8]  x1:(MOVBZloadidx [i+1] {s} p idx mem)))
-    s1:(SLDconst [16] x2:(MOVBZloadidx [i+2] {s} p idx mem)))
-    s2:(SLDconst [24] x3:(MOVBZloadidx [i+3] {s} p idx mem)))
-    s3:(SLDconst [32] x4:(MOVBZloadidx [i+4] {s} p idx mem)))
-    s4:(SLDconst [40] x5:(MOVBZloadidx [i+5] {s} p idx mem)))
-    s5:(SLDconst [48] x6:(MOVBZloadidx [i+6] {s} p idx mem)))
-    s6:(SLDconst [56] x7:(MOVBZloadidx [i+7] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+
+(OR
+    s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
+    or:(OR
+        s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRloadidx <v.Type> [i] {s} p idx mem)
-
-// Big endian loads.
-
-// b[1] | b[0]<<8 -> load 16-bit
-(ORW                  x0:(MOVBZload [i]   {s} p mem)
-    s0:(SLWconst [8] x1:(MOVBZload [i-1] {s} p mem)))
-  && p.Op != OpSB
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+
+(OR
+    s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))
+    or:(OR
+        s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))
+       y))
+  && i1 == i0+2
+  && j1 == j0-16
+  && j1 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVHZload [i-1] {s} p mem)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
 
-// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit
-(ORW o0:(ORW x0:(MOVHZload [i] {s} p mem)
-    s0:(SLWconst [16] x1:(MOVBZload [i-1] {s} p mem)))
-    s1:(SLWconst [24] x2:(MOVBZload [i-2] {s} p mem)))
-  && p.Op != OpSB
+// Little-endian loads
+
+(ORW                 x0:(MOVBZload [i0] {s} p mem)
+    sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+
+(OR                  x0:(MOVBZload [i0] {s} p mem)
+    sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+
+(ORW                  r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
+    sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
+
+(OR                   r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
+    sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
+
+(OR                   r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
+    sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
+
+(ORW
+    s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
+    or:(ORW
+        s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVWZload [i-2] {s} p mem)
-
-// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit
-(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
-                      x0:(MOVBZload [i]   {s} p mem)
-    s0:(SLDconst [8]  x1:(MOVBZload [i-1] {s} p mem)))
-    s1:(SLDconst [16] x2:(MOVBZload [i-2] {s} p mem)))
-    s2:(SLDconst [24] x3:(MOVBZload [i-3] {s} p mem)))
-    s3:(SLDconst [32] x4:(MOVBZload [i-4] {s} p mem)))
-    s4:(SLDconst [40] x5:(MOVBZload [i-5] {s} p mem)))
-    s5:(SLDconst [48] x6:(MOVBZload [i-6] {s} p mem)))
-    s6:(SLDconst [56] x7:(MOVBZload [i-7] {s} p mem)))
-  && p.Op != OpSB
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+
+(OR
+    s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
+    or:(OR
+        s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload [i-7] {s} p mem)
-
-// b[1] | b[0]<<8 -> load 16-bit
-(ORW                 x0:(MOVBZloadidx [i]   {s} p idx mem)
-    s0:(SLWconst [8] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+
+(OR
+    s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
+    or:(OR
+        s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
+       y))
+  && i1 == i0+2
+  && j1 == j0+16
+  && j0 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVHZloadidx <v.Type> [i-1] {s} p idx mem)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+
+// Little-endian indexed loads
+
+(ORW                 x0:(MOVBZloadidx [i0] {s} p idx mem)
+    sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
 
-// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit
-(ORW o0:(ORW x0:(MOVHZloadidx [i] {s} p idx mem)
-    s0:(SLWconst [16] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
-    s1:(SLWconst [24] x2:(MOVBZloadidx [i-2] {s} p idx mem)))
+(OR                  x0:(MOVBZloadidx [i0] {s} p idx mem)
+    sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+
+(ORW                  r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
+    sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+
+(OR                   r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
+    sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+
+(OR                   r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem))
+    sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
+
+(ORW
+    s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
+    or:(ORW
+        s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(s0)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+
+(OR
+    s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
+    or:(OR
+        s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVWZloadidx <v.Type> [i-2] {s} p idx mem)
-
-// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit
-(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
-                      x0:(MOVBZloadidx [i]   {s} p idx mem)
-    s0:(SLDconst [8]  x1:(MOVBZloadidx [i-1] {s} p idx mem)))
-    s1:(SLDconst [16] x2:(MOVBZloadidx [i-2] {s} p idx mem)))
-    s2:(SLDconst [24] x3:(MOVBZloadidx [i-3] {s} p idx mem)))
-    s3:(SLDconst [32] x4:(MOVBZloadidx [i-4] {s} p idx mem)))
-    s4:(SLDconst [40] x5:(MOVBZloadidx [i-5] {s} p idx mem)))
-    s5:(SLDconst [48] x6:(MOVBZloadidx [i-6] {s} p idx mem)))
-    s6:(SLDconst [56] x7:(MOVBZloadidx [i-7] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+
+(OR
+    s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))
+    or:(OR
+        s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       y))
+  && i1 == i0+2
+  && j1 == j0+16
+  && j0 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
+  && clobber(r0)
+  && clobber(r1)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <v.Type> [i-7] {s} p idx mem)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
 
 // Combine stores into store multiples.
 // 32-bit
index 7765d9792fda3817188febceb97a9f3c2e23efc4..07e7dd24c82dbc8d0c55e17f842296fea1ae9da4 100644 (file)
@@ -216,8 +216,8 @@ func init() {
                {name: "MULLDload", argLength: 3, reg: gpopload, asm: "MULLD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
                {name: "MULLWload", argLength: 3, reg: gpopload, asm: "MULLW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
 
-               {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", typ: "Int64", resultInArg0: true, clobberFlags: true},   // (arg0 * arg1) >> width
-               {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", typ: "Int64", resultInArg0: true, clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true},   // (arg0 * arg1) >> width
+               {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 * arg1) >> width
 
                {name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
                {name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
@@ -265,24 +265,24 @@ func init() {
                {name: "FCMPS", argLength: 2, reg: fp2flags, asm: "CEBR", typ: "Flags"}, // arg0 compare to arg1, f32
                {name: "FCMP", argLength: 2, reg: fp2flags, asm: "FCMPU", typ: "Flags"}, // arg0 compare to arg1, f64
 
-               {name: "SLD", argLength: 2, reg: sh21, asm: "SLD"},                    // arg0 << arg1, shift amount is mod 64
-               {name: "SLW", argLength: 2, reg: sh21, asm: "SLW"},                    // arg0 << arg1, shift amount is mod 32
-               {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"}, // arg0 << auxint, shift amount 0-63
-               {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int32"}, // arg0 << auxint, shift amount 0-31
+               {name: "SLD", argLength: 2, reg: sh21, asm: "SLD"},                   // arg0 << arg1, shift amount is mod 64
+               {name: "SLW", argLength: 2, reg: sh21, asm: "SLW"},                   // arg0 << arg1, shift amount is mod 32
+               {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int8"}, // arg0 << auxint, shift amount 0-63
+               {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int8"}, // arg0 << auxint, shift amount 0-31
 
-               {name: "SRD", argLength: 2, reg: sh21, asm: "SRD"},                    // unsigned arg0 >> arg1, shift amount is mod 64
-               {name: "SRW", argLength: 2, reg: sh21, asm: "SRW"},                    // unsigned arg0 >> arg1, shift amount is mod 32
-               {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"}, // unsigned arg0 >> auxint, shift amount 0-63
-               {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int32"}, // unsigned arg0 >> auxint, shift amount 0-31
+               {name: "SRD", argLength: 2, reg: sh21, asm: "SRD"},                   // unsigned arg0 >> arg1, shift amount is mod 64
+               {name: "SRW", argLength: 2, reg: sh21, asm: "SRW"},                   // unsigned arg0 >> arg1, shift amount is mod 32
+               {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int8"}, // unsigned arg0 >> auxint, shift amount 0-63
+               {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int8"}, // unsigned arg0 >> auxint, shift amount 0-31
 
                // Arithmetic shifts clobber flags.
-               {name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true},                    // signed arg0 >> arg1, shift amount is mod 64
-               {name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true},                    // signed arg0 >> arg1, shift amount is mod 32
-               {name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
-               {name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int32", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
+               {name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true},                   // signed arg0 >> arg1, shift amount is mod 64
+               {name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true},                   // signed arg0 >> arg1, shift amount is mod 32
+               {name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
+               {name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
 
-               {name: "RLLGconst", argLength: 1, reg: gp11, asm: "RLLG", aux: "Int64"}, // arg0 rotate left auxint, rotate amount 0-63
-               {name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "Int32"},   // arg0 rotate left auxint, rotate amount 0-31
+               {name: "RLLGconst", argLength: 1, reg: gp11, asm: "RLLG", aux: "Int8"}, // arg0 rotate left auxint, rotate amount 0-63
+               {name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "Int8"},   // arg0 rotate left auxint, rotate amount 0-31
 
                // unary ops
                {name: "NEG", argLength: 1, reg: gp11, asm: "NEG", clobberFlags: true},   // -arg0
@@ -364,20 +364,20 @@ func init() {
 
                // indexed loads/stores
                // TODO(mundaym): add sign-extended indexed loads
-               {name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", aux: "SymOff", clobberFlags: true, symEffect: "Read"},      // load a byte from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", aux: "SymOff", clobberFlags: true, symEffect: "Read"},      // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", aux: "SymOff", clobberFlags: true, symEffect: "Read"},      // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", aux: "SymOff", clobberFlags: true, symEffect: "Read"},        // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", aux: "SymOff", clobberFlags: true, symEffect: "Read"},    // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
-               {name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", aux: "SymOff", clobberFlags: true, symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
-               {name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", aux: "SymOff", clobberFlags: true, symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
-               {name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", clobberFlags: true, symEffect: "Write"},     // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", aux: "SymOff", clobberFlags: true, symEffect: "Write"},     // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", clobberFlags: true, symEffect: "Write"},     // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", aux: "SymOff", clobberFlags: true, symEffect: "Write"},     // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
-               {name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
-               {name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
+               {name: "MOVBZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", clobberFlags: true, symEffect: "Read"},   // load a byte from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVHZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", clobberFlags: true, symEffect: "Read"},  // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", clobberFlags: true, symEffect: "Read"},  // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVDloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVD", aux: "SymOff", typ: "UInt64", clobberFlags: true, symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHBR", aux: "SymOff", typ: "Int16", clobberFlags: true, symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
+               {name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWBR", aux: "SymOff", typ: "Int32", clobberFlags: true, symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
+               {name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVDBR", aux: "SymOff", typ: "Int64", clobberFlags: true, symEffect: "Read"}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
+               {name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", clobberFlags: true, symEffect: "Write"},                // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVH", aux: "SymOff", clobberFlags: true, symEffect: "Write"},                // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", clobberFlags: true, symEffect: "Write"},                // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVD", aux: "SymOff", clobberFlags: true, symEffect: "Write"},                // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVHBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"},            // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
+               {name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVWBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"},            // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
+               {name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVDBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"},            // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
 
                // For storeconst ops, the AuxInt field encodes both
                // the value to store and an address offset of the store.
index 86d0fcab324be5aab06a41588caf10b9555bd750..95d5c7766e91bd1b753dd6ab98477d9a389c3b6d 100644 (file)
 (Neq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) -> (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
 (Neq8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x)) -> (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
 
-// canonicalize: swap arguments for commutative operations when one argument is a constant.
-(Eq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Eq64 (Const64 <t> [c]) x)
-(Eq32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Eq32 (Const32 <t> [c]) x)
-(Eq16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Eq16 (Const16 <t> [c]) x)
-(Eq8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Eq8  (Const8  <t> [c]) x)
-
-(Neq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Neq64 (Const64 <t> [c]) x)
-(Neq32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Neq32 (Const32 <t> [c]) x)
-(Neq16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Neq16 (Const16 <t> [c]) x)
-(Neq8  x (Const8 <t>  [c])) && x.Op != OpConst8  -> (Neq8  (Const8  <t> [c]) x)
-
-// AddPtr is not canonicalized because nilcheck ptr checks the first argument to be non-nil.
-(Add64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Add64 (Const64 <t> [c]) x)
-(Add32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Add32 (Const32 <t> [c]) x)
-(Add16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Add16 (Const16 <t> [c]) x)
-(Add8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Add8  (Const8  <t> [c]) x)
-
-(Mul64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Mul64 (Const64 <t> [c]) x)
-(Mul32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Mul32 (Const32 <t> [c]) x)
-(Mul16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Mul16 (Const16 <t> [c]) x)
-(Mul8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Mul8  (Const8  <t> [c]) x)
-
+// Canonicalize x-const to x+(-const)
 (Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Add64 (Const64 <t> [-c]) x)
 (Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Add32 (Const32 <t> [int64(int32(-c))]) x)
 (Sub16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Add16 (Const16 <t> [int64(int16(-c))]) x)
 (Sub8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Add8  (Const8  <t> [int64(int8(-c))]) x)
 
-(And64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (And64 (Const64 <t> [c]) x)
-(And32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (And32 (Const32 <t> [c]) x)
-(And16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (And16 (Const16 <t> [c]) x)
-(And8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (And8  (Const8  <t> [c]) x)
-
-(Or64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Or64 (Const64 <t> [c]) x)
-(Or32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Or32 (Const32 <t> [c]) x)
-(Or16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Or16 (Const16 <t> [c]) x)
-(Or8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Or8  (Const8  <t> [c]) x)
-
-(Xor64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Xor64 (Const64 <t> [c]) x)
-(Xor32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Xor32 (Const32 <t> [c]) x)
-(Xor16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Xor16 (Const16 <t> [c]) x)
-(Xor8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Xor8  (Const8  <t> [c]) x)
-
 // fold negation into comparison operators
 (Not (Eq64 x y)) -> (Neq64 x y)
 (Not (Eq32 x y)) -> (Neq32 x y)
 (And32 x (And32 x y)) -> (And32 x y)
 (And16 x (And16 x y)) -> (And16 x y)
 (And8  x (And8  x y)) -> (And8  x y)
-(And64 x (And64 y x)) -> (And64 x y)
-(And32 x (And32 y x)) -> (And32 x y)
-(And16 x (And16 y x)) -> (And16 x y)
-(And8  x (And8  y x)) -> (And8  x y)
-(And64 (And64 x y) x) -> (And64 x y)
-(And32 (And32 x y) x) -> (And32 x y)
-(And16 (And16 x y) x) -> (And16 x y)
-(And8  (And8  x y) x) -> (And8  x y)
-(And64 (And64 x y) y) -> (And64 x y)
-(And32 (And32 x y) y) -> (And32 x y)
-(And16 (And16 x y) y) -> (And16 x y)
-(And8  (And8  x y) y) -> (And8  x y)
 (Or64 x (Or64 x y)) -> (Or64 x y)
 (Or32 x (Or32 x y)) -> (Or32 x y)
 (Or16 x (Or16 x y)) -> (Or16 x y)
 (Or8  x (Or8  x y)) -> (Or8  x y)
-(Or64 x (Or64 y x)) -> (Or64 x y)
-(Or32 x (Or32 y x)) -> (Or32 x y)
-(Or16 x (Or16 y x)) -> (Or16 x y)
-(Or8  x (Or8  y x)) -> (Or8  x y)
-(Or64 (Or64 x y) x) -> (Or64 x y)
-(Or32 (Or32 x y) x) -> (Or32 x y)
-(Or16 (Or16 x y) x) -> (Or16 x y)
-(Or8  (Or8  x y) x) -> (Or8  x y)
-(Or64 (Or64 x y) y) -> (Or64 x y)
-(Or32 (Or32 x y) y) -> (Or32 x y)
-(Or16 (Or16 x y) y) -> (Or16 x y)
-(Or8  (Or8  x y) y) -> (Or8  x y)
 (Xor64 x (Xor64 x y)) -> y
 (Xor32 x (Xor32 x y)) -> y
 (Xor16 x (Xor16 x y)) -> y
 (Xor8  x (Xor8  x y)) -> y
-(Xor64 x (Xor64 y x)) -> y
-(Xor32 x (Xor32 y x)) -> y
-(Xor16 x (Xor16 y x)) -> y
-(Xor8  x (Xor8  y x)) -> y
-(Xor64 (Xor64 x y) x) -> y
-(Xor32 (Xor32 x y) x) -> y
-(Xor16 (Xor16 x y) x) -> y
-(Xor8  (Xor8  x y) x) -> y
-(Xor64 (Xor64 x y) y) -> x
-(Xor32 (Xor32 x y) y) -> x
-(Xor16 (Xor16 x y) y) -> x
-(Xor8  (Xor8  x y) y) -> x
 
 (Trunc64to8  (And64 (Const64 [y]) x)) && y&0xFF == 0xFF -> (Trunc64to8 x)
 (Trunc64to16 (And64 (Const64 [y]) x)) && y&0xFFFF == 0xFFFF -> (Trunc64to16 x)
 
 // user nil checks
 (NeqPtr p (ConstNil)) -> (IsNonNil p)
-(NeqPtr (ConstNil) p) -> (IsNonNil p)
 (EqPtr p (ConstNil)) -> (Not (IsNonNil p))
-(EqPtr (ConstNil) p) -> (Not (IsNonNil p))
 (IsNonNil (ConstNil)) -> (ConstBool [0])
 
 // slice and interface comparisons
 
 // Get rid of Convert ops for pointer arithmetic on unsafe.Pointer.
 (Convert (Add64 (Convert ptr mem) off) mem) -> (Add64 ptr off)
-(Convert (Add64 off (Convert ptr mem)) mem) -> (Add64 ptr off)
 (Convert (Convert ptr mem) mem) -> ptr
 
 // Decompose compound argument values
 // Reassociate expressions involving
 // constants such that constants come first,
 // exposing obvious constant-folding opportunities.
-// First, re-write (op x (op y z)) to (op (op y z) x) if
-// the op is commutative, to reduce the number of subsequent
-// matching rules for folding. Then, reassociate
-// (op (op y C) x) to (op C (op x y)) or similar, where C
+// Reassociate (op (op y C) x) to (op C (op x y)) or similar, where C
 // is constant, which pushes constants to the outside
 // of the expression. At that point, any constant-folding
 // opportunities should be obvious.
 
-(Add64 x l:(Add64 _ _)) && (x.Op != OpAdd64 && x.Op != OpConst64) -> (Add64 l x)
-(Add32 x l:(Add32 _ _)) && (x.Op != OpAdd32 && x.Op != OpConst32) -> (Add32 l x)
-(Add16 x l:(Add16 _ _)) && (x.Op != OpAdd16 && x.Op != OpConst16) -> (Add16 l x)
-(Add8  x l:(Add8  _ _)) && (x.Op != OpAdd8  && x.Op != OpConst8)  -> (Add8  l x)
-(And64 x l:(And64 _ _)) && (x.Op != OpAnd64 && x.Op != OpConst64) -> (And64 l x)
-(And32 x l:(And32 _ _)) && (x.Op != OpAnd32 && x.Op != OpConst32) -> (And32 l x)
-(And16 x l:(And16 _ _)) && (x.Op != OpAnd16 && x.Op != OpConst16) -> (And16 l x)
-(And8  x l:(And8  _ _)) && (x.Op != OpAnd8  && x.Op != OpConst8)  -> (And8  l x)
-(Or64 x l:(Or64 _ _)) && (x.Op != OpOr64 && x.Op != OpConst64) -> (Or64 l x)
-(Or32 x l:(Or32 _ _)) && (x.Op != OpOr32 && x.Op != OpConst32) -> (Or32 l x)
-(Or16 x l:(Or16 _ _)) && (x.Op != OpOr16 && x.Op != OpConst16) -> (Or16 l x)
-(Or8  x l:(Or8  _ _)) && (x.Op != OpOr8  && x.Op != OpConst8)  -> (Or8  l x)
-(Xor64 x l:(Xor64 _ _)) && (x.Op != OpXor64 && x.Op != OpConst64) -> (Xor64 l x)
-(Xor32 x l:(Xor32 _ _)) && (x.Op != OpXor32 && x.Op != OpConst32) -> (Xor32 l x)
-(Xor16 x l:(Xor16 _ _)) && (x.Op != OpXor16 && x.Op != OpConst16) -> (Xor16 l x)
-(Xor8  x l:(Xor8  _ _)) && (x.Op != OpXor8  && x.Op != OpConst8)  -> (Xor8  l x)
-(Mul64 x l:(Mul64 _ _)) && (x.Op != OpMul64 && x.Op != OpConst64) -> (Mul64 l x)
-(Mul32 x l:(Mul32 _ _)) && (x.Op != OpMul32 && x.Op != OpConst32) -> (Mul32 l x)
-(Mul16 x l:(Mul16 _ _)) && (x.Op != OpMul16 && x.Op != OpConst16) -> (Mul16 l x)
-(Mul8  x l:(Mul8  _ _)) && (x.Op != OpMul8  && x.Op != OpConst8)  -> (Mul8  l x)
-
 // x + (C + z) -> C + (x + z)
 (Add64 (Add64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Add64 <t> z x))
 (Add32 (Add32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Add32 i (Add32 <t> z x))
 
 // floating point optimizations
 (Add32F x (Const32F [0])) -> x
-(Add32F (Const32F [0]) x) -> x
 (Add64F x (Const64F [0])) -> x
-(Add64F (Const64F [0]) x) -> x
 (Sub32F x (Const32F [0])) -> x
 (Sub64F x (Const64F [0])) -> x
 (Mul32F x (Const32F [f2i(1)])) -> x
-(Mul32F (Const32F [f2i(1)]) x) -> x
 (Mul64F x (Const64F [f2i(1)])) -> x
-(Mul64F (Const64F [f2i(1)]) x) -> x
 (Mul32F x (Const32F [f2i(-1)])) -> (Neg32F x)
-(Mul32F (Const32F [f2i(-1)]) x) -> (Neg32F x)
 (Mul64F x (Const64F [f2i(-1)])) -> (Neg64F x)
-(Mul64F (Const64F [f2i(-1)]) x) -> (Neg64F x)
 (Div32F x (Const32F [f2i(1)])) -> x
 (Div64F x (Const64F [f2i(1)])) -> x
 (Div32F x (Const32F [f2i(-1)])) -> (Neg32F x)
index a5ac62829ee09e5fb28ebecdf0959858ec758391..7991f32679c35512b42d1c9763a387c57672b7a9 100644 (file)
@@ -28,8 +28,8 @@ var genericOps = []opData{
        {name: "Add32", argLength: 2, commutative: true},
        {name: "Add64", argLength: 2, commutative: true},
        {name: "AddPtr", argLength: 2}, // For address calculations.  arg0 is a pointer and arg1 is an int.
-       {name: "Add32F", argLength: 2},
-       {name: "Add64F", argLength: 2},
+       {name: "Add32F", argLength: 2, commutative: true},
+       {name: "Add64F", argLength: 2, commutative: true},
 
        {name: "Sub8", argLength: 2}, // arg0 - arg1
        {name: "Sub16", argLength: 2},
@@ -43,24 +43,25 @@ var genericOps = []opData{
        {name: "Mul16", argLength: 2, commutative: true},
        {name: "Mul32", argLength: 2, commutative: true},
        {name: "Mul64", argLength: 2, commutative: true},
-       {name: "Mul32F", argLength: 2},
-       {name: "Mul64F", argLength: 2},
+       {name: "Mul32F", argLength: 2, commutative: true},
+       {name: "Mul64F", argLength: 2, commutative: true},
 
        {name: "Div32F", argLength: 2}, // arg0 / arg1
        {name: "Div64F", argLength: 2},
 
-       {name: "Hmul32", argLength: 2},
-       {name: "Hmul32u", argLength: 2},
-       {name: "Hmul64", argLength: 2},
-       {name: "Hmul64u", argLength: 2},
+       {name: "Hmul32", argLength: 2, commutative: true},
+       {name: "Hmul32u", argLength: 2, commutative: true},
+       {name: "Hmul64", argLength: 2, commutative: true},
+       {name: "Hmul64u", argLength: 2, commutative: true},
 
-       {name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
-       {name: "Mul64uhilo", argLength: 2, typ: "(UInt64,UInt64)"}, // arg0 * arg1, returns (hi, lo)
+       {name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)", commutative: true}, // arg0 * arg1, returns (hi, lo)
+       {name: "Mul64uhilo", argLength: 2, typ: "(UInt64,UInt64)", commutative: true}, // arg0 * arg1, returns (hi, lo)
 
        // Weird special instructions for use in the strength reduction of divides.
        // These ops compute unsigned (arg0 + arg1) / 2, correct to all
        // 32/64 bits, even when the intermediate result of the add has 33/65 bits.
        // These ops can assume arg0 >= arg1.
+       // Note: these ops aren't commutative!
        {name: "Avg32u", argLength: 2, typ: "UInt32"}, // 32-bit platforms only
        {name: "Avg64u", argLength: 2, typ: "UInt64"}, // 64-bit platforms only
 
@@ -159,8 +160,8 @@ var genericOps = []opData{
        {name: "EqPtr", argLength: 2, commutative: true, typ: "Bool"},
        {name: "EqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
        {name: "EqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-       {name: "Eq32F", argLength: 2, typ: "Bool"},
-       {name: "Eq64F", argLength: 2, typ: "Bool"},
+       {name: "Eq32F", argLength: 2, commutative: true, typ: "Bool"},
+       {name: "Eq64F", argLength: 2, commutative: true, typ: "Bool"},
 
        {name: "Neq8", argLength: 2, commutative: true, typ: "Bool"}, // arg0 != arg1
        {name: "Neq16", argLength: 2, commutative: true, typ: "Bool"},
@@ -169,8 +170,8 @@ var genericOps = []opData{
        {name: "NeqPtr", argLength: 2, commutative: true, typ: "Bool"},
        {name: "NeqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
        {name: "NeqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-       {name: "Neq32F", argLength: 2, typ: "Bool"},
-       {name: "Neq64F", argLength: 2},
+       {name: "Neq32F", argLength: 2, commutative: true, typ: "Bool"},
+       {name: "Neq64F", argLength: 2, commutative: true, typ: "Bool"},
 
        {name: "Less8", argLength: 2, typ: "Bool"},  // arg0 < arg1, signed
        {name: "Less8U", argLength: 2, typ: "Bool"}, // arg0 < arg1, unsigned
index beabca97d0ee1fa677359d56ce60ec947fc8a307..6924bbca951c39418827a5c23afcb375be43f2a0 100644 (file)
@@ -30,7 +30,7 @@ import (
 //  sexpr [&& extra conditions] -> [@block] sexpr
 //
 // sexpr are s-expressions (lisp-like parenthesized groupings)
-// sexpr ::= (opcode sexpr*)
+// sexpr ::= [variable:](opcode sexpr*)
 //         | variable
 //         | <type>
 //         | [auxint]
@@ -39,7 +39,7 @@ import (
 // aux      ::= variable | {code}
 // type     ::= variable | {code}
 // variable ::= some token
-// opcode   ::= one of the opcodes from ../op.go (without the Op prefix)
+// opcode   ::= one of the opcodes from the *Ops.go files
 
 // extra conditions is just a chunk of Go that evaluates to a boolean. It may use
 // variables declared in the matching sexpr. The variable "v" is predefined to be
@@ -119,15 +119,17 @@ func genRules(arch arch) {
                }
 
                loc := fmt.Sprintf("%s.rules:%d", arch.name, ruleLineno)
-               r := Rule{rule: rule, loc: loc}
-               if rawop := strings.Split(rule, " ")[0][1:]; isBlock(rawop, arch) {
-                       blockrules[rawop] = append(blockrules[rawop], r)
-               } else {
-                       // Do fancier value op matching.
-                       match, _, _ := r.parse()
-                       op, oparch, _, _, _, _ := parseValue(match, arch, loc)
-                       opname := fmt.Sprintf("Op%s%s", oparch, op.name)
-                       oprules[opname] = append(oprules[opname], r)
+               for _, crule := range commute(rule, arch) {
+                       r := Rule{rule: crule, loc: loc}
+                       if rawop := strings.Split(crule, " ")[0][1:]; isBlock(rawop, arch) {
+                               blockrules[rawop] = append(blockrules[rawop], r)
+                       } else {
+                               // Do fancier value op matching.
+                               match, _, _ := r.parse()
+                               op, oparch, _, _, _, _ := parseValue(match, arch, loc)
+                               opname := fmt.Sprintf("Op%s%s", oparch, op.name)
+                               oprules[opname] = append(oprules[opname], r)
+                       }
                }
                rule = ""
                ruleLineno = 0
@@ -752,3 +754,169 @@ func isVariable(s string) bool {
        }
        return b
 }
+
+// commute returns all equivalent rules to r after applying all possible
+// argument swaps to the commutable ops in r.
+// Potentially exponential, be careful.
+func commute(r string, arch arch) []string {
+       match, cond, result := Rule{rule: r}.parse()
+       a := commute1(match, varCount(match), arch)
+       for i, m := range a {
+               if cond != "" {
+                       m += " && " + cond
+               }
+               m += " -> " + result
+               a[i] = m
+       }
+       if len(a) == 1 && normalizeWhitespace(r) != normalizeWhitespace(a[0]) {
+               fmt.Println(normalizeWhitespace(r))
+               fmt.Println(normalizeWhitespace(a[0]))
+               panic("commute() is not the identity for noncommuting rule")
+       }
+       if false && len(a) > 1 {
+               fmt.Println(r)
+               for _, x := range a {
+                       fmt.Println("  " + x)
+               }
+       }
+       return a
+}
+
+func commute1(m string, cnt map[string]int, arch arch) []string {
+       if m[0] == '<' || m[0] == '[' || m[0] == '{' || isVariable(m) {
+               return []string{m}
+       }
+       // Split up input.
+       var prefix string
+       colon := strings.Index(m, ":")
+       if colon >= 0 && isVariable(m[:colon]) {
+               prefix = m[:colon+1]
+               m = m[colon+1:]
+       }
+       if m[0] != '(' || m[len(m)-1] != ')' {
+               panic("non-compound expr in commute1: " + m)
+       }
+       s := split(m[1 : len(m)-1])
+       op := s[0]
+
+       // Figure out if the op is commutative or not.
+       commutative := false
+       for _, x := range genericOps {
+               if op == x.name {
+                       if x.commutative {
+                               commutative = true
+                       }
+                       break
+               }
+       }
+       if arch.name != "generic" {
+               for _, x := range arch.ops {
+                       if op == x.name {
+                               if x.commutative {
+                                       commutative = true
+                               }
+                               break
+                       }
+               }
+       }
+       var idx0, idx1 int
+       if commutative {
+               // Find indexes of two args we can swap.
+               for i, arg := range s {
+                       if i == 0 || arg[0] == '<' || arg[0] == '[' || arg[0] == '{' {
+                               continue
+                       }
+                       if idx0 == 0 {
+                               idx0 = i
+                               continue
+                       }
+                       if idx1 == 0 {
+                               idx1 = i
+                               break
+                       }
+               }
+               if idx1 == 0 {
+                       panic("couldn't find first two args of commutative op " + s[0])
+               }
+               if cnt[s[idx0]] == 1 && cnt[s[idx1]] == 1 || s[idx0] == s[idx1] && cnt[s[idx0]] == 2 {
+                       // When we have (Add x y) with no ther uses of x and y in the matching rule,
+                       // then we can skip the commutative match (Add y x).
+                       commutative = false
+               }
+       }
+
+       // Recursively commute arguments.
+       a := make([][]string, len(s))
+       for i, arg := range s {
+               a[i] = commute1(arg, cnt, arch)
+       }
+
+       // Choose all possibilities from all args.
+       r := crossProduct(a)
+
+       // If commutative, do that again with its two args reversed.
+       if commutative {
+               a[idx0], a[idx1] = a[idx1], a[idx0]
+               r = append(r, crossProduct(a)...)
+       }
+
+       // Construct result.
+       for i, x := range r {
+               r[i] = prefix + "(" + x + ")"
+       }
+       return r
+}
+
+// varCount returns a map which counts the number of occurrences of
+// Value variables in m.
+func varCount(m string) map[string]int {
+       cnt := map[string]int{}
+       varCount1(m, cnt)
+       return cnt
+}
+func varCount1(m string, cnt map[string]int) {
+       if m[0] == '<' || m[0] == '[' || m[0] == '{' {
+               return
+       }
+       if isVariable(m) {
+               cnt[m]++
+               return
+       }
+       // Split up input.
+       colon := strings.Index(m, ":")
+       if colon >= 0 && isVariable(m[:colon]) {
+               cnt[m[:colon]]++
+               m = m[colon+1:]
+       }
+       if m[0] != '(' || m[len(m)-1] != ')' {
+               panic("non-compound expr in commute1: " + m)
+       }
+       s := split(m[1 : len(m)-1])
+       for _, arg := range s[1:] {
+               varCount1(arg, cnt)
+       }
+}
+
+// crossProduct returns all possible values
+// x[0][i] + " " + x[1][j] + " " + ... + " " + x[len(x)-1][k]
+// for all valid values of i, j, ..., k.
+func crossProduct(x [][]string) []string {
+       if len(x) == 1 {
+               return x[0]
+       }
+       var r []string
+       for _, tail := range crossProduct(x[1:]) {
+               for _, first := range x[0] {
+                       r = append(r, first+" "+tail)
+               }
+       }
+       return r
+}
+
+// normalizeWhitespace replaces 2+ whitespace sequences with a single space.
+func normalizeWhitespace(x string) string {
+       x = strings.Join(strings.Fields(x), " ")
+       x = strings.Replace(x, "( ", "(", -1)
+       x = strings.Replace(x, " )", ")", -1)
+       return x
+}
index ce6988e0142afb12e63190371ff60988d2381550..81091ee2fa38c615e6a465514f53003179920c02 100644 (file)
@@ -2475,6 +2475,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULL",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AIMULL,
                reg: regInfo{
@@ -2491,6 +2492,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULLU",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULL,
                reg: regInfo{
@@ -2507,6 +2509,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "MULLQU",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULL,
                reg: regInfo{
@@ -2854,9 +2857,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTL",
-               argLen: 2,
-               asm:    x86.ATESTL,
+               name:        "TESTL",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 255}, // AX CX DX BX SP BP SI DI
@@ -2865,9 +2869,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTW",
-               argLen: 2,
-               asm:    x86.ATESTW,
+               name:        "TESTW",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 255}, // AX CX DX BX SP BP SI DI
@@ -2876,9 +2881,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTB",
-               argLen: 2,
-               asm:    x86.ATESTB,
+               name:        "TESTB",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 255}, // AX CX DX BX SP BP SI DI
@@ -3658,10 +3664,11 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "LEAL1",
-               auxType:   auxSymOff,
-               argLen:    2,
-               symEffect: SymAddr,
+               name:        "LEAL1",
+               auxType:     auxSymOff,
+               argLen:      2,
+               commutative: true,
+               symEffect:   SymAddr,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3840,11 +3847,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVBloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVBLZX,
+               name:        "MOVBloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVBLZX,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3856,11 +3864,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVWloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVWLZX,
+               name:        "MOVWloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVWLZX,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3888,11 +3897,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVLloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVL,
+               name:        "MOVLloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3920,11 +3930,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVBstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVB,
+               name:        "MOVBstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3934,11 +3945,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVWstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVW,
+               name:        "MOVWstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3962,11 +3974,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVLstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVL,
+               name:        "MOVLstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -4949,6 +4962,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULQ",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AIMULQ,
                reg: regInfo{
@@ -4965,6 +4979,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULL",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AIMULL,
                reg: regInfo{
@@ -4981,6 +4996,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULQU",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULQ,
                reg: regInfo{
@@ -4997,6 +5013,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULLU",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULL,
                reg: regInfo{
@@ -5125,6 +5142,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "MULQU2",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULQ,
                reg: regInfo{
@@ -5508,9 +5526,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTQ",
-               argLen: 2,
-               asm:    x86.ATESTQ,
+               name:        "TESTQ",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -5519,9 +5538,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTL",
-               argLen: 2,
-               asm:    x86.ATESTL,
+               name:        "TESTL",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -5530,9 +5550,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTW",
-               argLen: 2,
-               asm:    x86.ATESTW,
+               name:        "TESTW",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -5541,9 +5562,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTB",
-               argLen: 2,
-               asm:    x86.ATESTB,
+               name:        "TESTB",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -5629,7 +5651,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHLQconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5645,7 +5667,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHLLconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5725,7 +5747,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHRQconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5741,7 +5763,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHRLconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5757,7 +5779,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHRWconst",
-               auxType:      auxInt16,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5853,7 +5875,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SARQconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5869,7 +5891,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SARLconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5885,7 +5907,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SARWconst",
-               auxType:      auxInt16,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5917,7 +5939,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "ROLQconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5933,7 +5955,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "ROLLconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5949,7 +5971,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "ROLWconst",
-               auxType:      auxInt16,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -6807,10 +6829,11 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "LEAQ1",
-               auxType:   auxSymOff,
-               argLen:    2,
-               symEffect: SymAddr,
+               name:        "LEAQ1",
+               auxType:     auxSymOff,
+               argLen:      2,
+               commutative: true,
+               symEffect:   SymAddr,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7081,11 +7104,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVBloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVBLZX,
+               name:        "MOVBloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVBLZX,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7097,11 +7121,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVWloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVWLZX,
+               name:        "MOVWloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVWLZX,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7129,11 +7154,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVLloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVL,
+               name:        "MOVLloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7161,11 +7187,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVQloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVQ,
+               name:        "MOVQloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7193,11 +7220,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVBstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVB,
+               name:        "MOVBstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7207,11 +7235,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVWstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVW,
+               name:        "MOVWstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7235,11 +7264,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVLstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVL,
+               name:        "MOVLstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7263,11 +7293,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVQstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVQ,
+               name:        "MOVQstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -16534,9 +16565,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "NOR",
-               argLen: 2,
-               asm:    ppc64.ANOR,
+               name:        "NOR",
+               argLen:      2,
+               commutative: true,
+               asm:         ppc64.ANOR,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
@@ -18282,6 +18314,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "MULHD",
                argLen:       2,
+               commutative:  true,
                resultInArg0: true,
                clobberFlags: true,
                asm:          s390x.AMULHD,
@@ -18298,6 +18331,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "MULHDU",
                argLen:       2,
+               commutative:  true,
                resultInArg0: true,
                clobberFlags: true,
                asm:          s390x.AMULHDU,
@@ -18885,7 +18919,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "SLDconst",
-               auxType: auxInt64,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ASLD,
                reg: regInfo{
@@ -18899,7 +18933,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "SLWconst",
-               auxType: auxInt32,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ASLW,
                reg: regInfo{
@@ -18941,7 +18975,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "SRDconst",
-               auxType: auxInt64,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ASRD,
                reg: regInfo{
@@ -18955,7 +18989,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "SRWconst",
-               auxType: auxInt32,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ASRW,
                reg: regInfo{
@@ -18999,7 +19033,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SRADconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                clobberFlags: true,
                asm:          s390x.ASRAD,
@@ -19014,7 +19048,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SRAWconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                clobberFlags: true,
                asm:          s390x.ASRAW,
@@ -19029,7 +19063,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "RLLGconst",
-               auxType: auxInt64,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ARLLG,
                reg: regInfo{
@@ -19043,7 +19077,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "RLLconst",
-               auxType: auxInt32,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ARLL,
                reg: regInfo{
@@ -19863,6 +19897,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVBZloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVBZ,
@@ -19880,6 +19915,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVHZloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVHZ,
@@ -19897,6 +19933,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVWZloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVWZ,
@@ -19914,6 +19951,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVDloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVD,
@@ -19931,6 +19969,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVHBRloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVHBR,
@@ -19948,6 +19987,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVWBRloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVWBR,
@@ -19965,6 +20005,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVDBRloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVDBR,
@@ -19982,6 +20023,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVBstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVB,
@@ -19997,6 +20039,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVHstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVH,
@@ -20012,6 +20055,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVWstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVW,
@@ -20027,6 +20071,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVDstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVD,
@@ -20042,6 +20087,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVHBRstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVHBR,
@@ -20057,6 +20103,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVWBRstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVWBR,
@@ -20072,6 +20119,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVDBRstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVDBR,
@@ -20646,14 +20694,16 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "Add32F",
-               argLen:  2,
-               generic: true,
+               name:        "Add32F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Add64F",
-               argLen:  2,
-               generic: true,
+               name:        "Add64F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:    "Sub8",
@@ -20715,14 +20765,16 @@ var opcodeTable = [...]opInfo{
                generic:     true,
        },
        {
-               name:    "Mul32F",
-               argLen:  2,
-               generic: true,
+               name:        "Mul32F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Mul64F",
-               argLen:  2,
-               generic: true,
+               name:        "Mul64F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:    "Div32F",
@@ -20735,34 +20787,40 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "Hmul32",
-               argLen:  2,
-               generic: true,
+               name:        "Hmul32",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Hmul32u",
-               argLen:  2,
-               generic: true,
+               name:        "Hmul32u",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Hmul64",
-               argLen:  2,
-               generic: true,
+               name:        "Hmul64",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Hmul64u",
-               argLen:  2,
-               generic: true,
+               name:        "Hmul64u",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Mul32uhilo",
-               argLen:  2,
-               generic: true,
+               name:        "Mul32uhilo",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Mul64uhilo",
-               argLen:  2,
-               generic: true,
+               name:        "Mul64uhilo",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:    "Avg32u",
@@ -21212,14 +21270,16 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "Eq32F",
-               argLen:  2,
-               generic: true,
+               name:        "Eq32F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Eq64F",
-               argLen:  2,
-               generic: true,
+               name:        "Eq64F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:        "Neq8",
@@ -21262,14 +21322,16 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "Neq32F",
-               argLen:  2,
-               generic: true,
+               name:        "Neq32F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Neq64F",
-               argLen:  2,
-               generic: true,
+               name:        "Neq64F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:    "Less8",
index ba5288de2a13da35530c6c7cb788d94ea4560c64..7ac78aa8bf60034efe7a89a32467baf057ebc641 100644 (file)
@@ -620,6 +620,40 @@ func rewriteValue386_Op386ADCL(v *Value) bool {
                v.AddArg(f)
                return true
        }
+       // match: (ADCL (MOVLconst [c]) x f)
+       // cond:
+       // result: (ADCLconst [c] x f)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               f := v.Args[2]
+               v.reset(Op386ADCLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(f)
+               return true
+       }
+       // match: (ADCL x (MOVLconst [c]) f)
+       // cond:
+       // result: (ADCLconst [c] x f)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               f := v.Args[2]
+               v.reset(Op386ADCLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(f)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386ADDL(v *Value) bool {
@@ -653,9 +687,9 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL (SHLLconst [c] x) (SHRLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [c   ] x)
+       // match: (ADDL (SHLLconst [c] x) (SHRLconst [d] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != Op386SHLLconst {
@@ -667,10 +701,11 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                if v_1.Op != Op386SHRLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(Op386ROLLconst)
@@ -678,34 +713,35 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL (SHRLconst [c] x) (SHLLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [32-c] x)
+       // match: (ADDL (SHRLconst [d] x) (SHLLconst [c] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(Op386ROLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (ADDL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -718,13 +754,11 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                if v_1.Op != Op386SHRWconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
                        break
                }
                v.reset(Op386ROLWconst)
@@ -732,38 +766,36 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (ADDL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
                        break
                }
                v.reset(Op386ROLWconst)
-               v.AuxInt = 16 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (ADDL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -776,13 +808,11 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                if v_1.Op != Op386SHRBconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
                        break
                }
                v.reset(Op386ROLBconst)
@@ -790,32 +820,30 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
+       // match: (ADDL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRBconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
                        break
                }
                v.reset(Op386ROLBconst)
-               v.AuxInt = 8 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -837,6 +865,24 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (SHLLconst [3] y) x)
+       // cond:
+       // result: (LEAL8 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x (SHLLconst [2] y))
        // cond:
        // result: (LEAL4 x y)
@@ -855,6 +901,24 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (SHLLconst [2] y) x)
+       // cond:
+       // result: (LEAL4 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x (SHLLconst [1] y))
        // cond:
        // result: (LEAL2 x y)
@@ -873,6 +937,24 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (SHLLconst [1] y) x)
+       // cond:
+       // result: (LEAL2 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(Op386LEAL2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x (ADDL y y))
        // cond:
        // result: (LEAL2 x y)
@@ -891,6 +973,24 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (ADDL y y) x)
+       // cond:
+       // result: (LEAL2 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               y := v_0.Args[0]
+               if y != v_0.Args[1] {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(Op386LEAL2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x (ADDL x y))
        // cond:
        // result: (LEAL2 y x)
@@ -927,6 +1027,42 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (ADDL (ADDL x y) x)
+       // cond:
+       // result: (LEAL2 y x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(Op386LEAL2)
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADDL (ADDL y x) x)
+       // cond:
+       // result: (LEAL2 y x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(Op386LEAL2)
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
        // match: (ADDL (ADDLconst [c] x) y)
        // cond:
        // result: (LEAL1 [c] x y)
@@ -944,17 +1080,17 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (ADDL x (ADDLconst [c] y))
+       // match: (ADDL y (ADDLconst [c] x))
        // cond:
        // result: (LEAL1 [c] x y)
        for {
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
                c := v_1.AuxInt
-               y := v_1.Args[0]
+               x := v_1.Args[0]
                v.reset(Op386LEAL1)
                v.AuxInt = c
                v.AddArg(x)
@@ -983,7 +1119,7 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (ADDL (LEAL [c] {s} x) y)
+       // match: (ADDL (LEAL [c] {s} y) x)
        // cond: x.Op != OpSB && y.Op != OpSB
        // result: (LEAL1 [c] {s} x y)
        for {
@@ -993,8 +1129,8 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                }
                c := v_0.AuxInt
                s := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
@@ -1020,6 +1156,21 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (NEGL y) x)
+       // cond:
+       // result: (SUBL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386NEGL {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(Op386SUBL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386ADDLcarry(v *Value) bool {
@@ -2096,20 +2247,20 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [c] {s} x (ADDLconst [d] y))
-       // cond: is32Bit(c+d)   && y.Op != OpSB
+       // match: (LEAL1 [c] {s} y (ADDLconst [d] x))
+       // cond: is32Bit(c+d)   && x.Op != OpSB
        // result: (LEAL1 [c+d] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
                d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+d) && y.Op != OpSB) {
+               x := v_1.Args[0]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
                v.reset(Op386LEAL1)
@@ -2141,9 +2292,9 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [c] {s} (SHLLconst [1] x) y)
+       // match: (LEAL1 [c] {s} (SHLLconst [1] y) x)
        // cond:
-       // result: (LEAL2 [c] {s} y x)
+       // result: (LEAL2 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -2154,13 +2305,13 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                if v_0.AuxInt != 1 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(Op386LEAL2)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAL1 [c] {s} x (SHLLconst [2] y))
@@ -2185,9 +2336,9 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [c] {s} (SHLLconst [2] x) y)
+       // match: (LEAL1 [c] {s} (SHLLconst [2] y) x)
        // cond:
-       // result: (LEAL4 [c] {s} y x)
+       // result: (LEAL4 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -2198,13 +2349,13 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                if v_0.AuxInt != 2 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(Op386LEAL4)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAL1 [c] {s} x (SHLLconst [3] y))
@@ -2229,9 +2380,9 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [c] {s} (SHLLconst [3] x) y)
+       // match: (LEAL1 [c] {s} (SHLLconst [3] y) x)
        // cond:
-       // result: (LEAL8 [c] {s} y x)
+       // result: (LEAL8 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -2242,13 +2393,13 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                if v_0.AuxInt != 3 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(Op386LEAL8)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAL1 [off1] {sym1} (LEAL [off2] {sym2} x) y)
@@ -2275,21 +2426,21 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [off1] {sym1} x (LEAL [off2] {sym2} y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB
+       // match: (LEAL1 [off1] {sym1} y (LEAL [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
        // result: (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386LEAL {
                        break
                }
                off2 := v_1.AuxInt
                sym2 := v_1.Aux
-               y := v_1.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB) {
+               x := v_1.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
                v.reset(Op386LEAL1)
@@ -2765,7 +2916,7 @@ func rewriteValue386_Op386MOVBload(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBload  [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -2788,7 +2939,7 @@ func rewriteValue386_Op386MOVBload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -2887,6 +3038,28 @@ func rewriteValue386_Op386MOVBloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBloadidx1 [c] {sym} idx (ADDLconst [d] ptr) mem)
+       // cond:
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
        // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
@@ -2909,6 +3082,28 @@ func rewriteValue386_Op386MOVBloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBloadidx1 [c] {sym} (ADDLconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386MOVBstore(v *Value) bool {
@@ -2958,7 +3153,7 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -3006,7 +3201,7 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -3415,19 +3610,19 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVBstoreidx1 [c] {sym} idx (ADDLconst [d] ptr) val mem)
        // cond:
        // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
-               ptr := v.Args[0]
+               idx := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
                d := v_1.AuxInt
-               idx := v_1.Args[0]
+               ptr := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
                v.reset(Op386MOVBstoreidx1)
@@ -3439,13 +3634,61 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       // match: (MOVBstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVBstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [c] {sym} (ADDLconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVBstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != Op386SHRLconst {
@@ -3487,9 +3730,9 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -3499,7 +3742,9 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                if v_2.Op != Op386SHRLconst {
                        break
                }
-               j := v_2.AuxInt
+               if v_2.AuxInt != 8 {
+                       break
+               }
                w := v_2.Args[0]
                x := v.Args[3]
                if x.Op != Op386MOVBstoreidx1 {
@@ -3511,20 +3756,13 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                if x.Aux != s {
                        break
                }
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != Op386SHRLconst {
+               if idx != x.Args[0] {
                        break
                }
-               if w0.AuxInt != j-8 {
+               if p != x.Args[1] {
                        break
                }
-               if w != w0.Args[0] {
+               if w != x.Args[2] {
                        break
                }
                mem := x.Args[3]
@@ -3536,555 +3774,452 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLload(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLstore {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if v_2.AuxInt != 8 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLload  [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if x.AuxInt != i-1 {
                        break
                }
-               v.reset(Op386MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               if x.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if p != x.Args[0] {
                        break
                }
-               v.reset(Op386MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               if idx != x.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w != x.Args[2] {
                        break
                }
-               v.reset(Op386MOVLloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL4 {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if v_2.AuxInt != 8 {
                        break
                }
-               v.reset(Op386MOVLloadidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off] {sym} (ADDL ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               if x.AuxInt != i-1 {
                        break
                }
-               v.reset(Op386MOVLloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLloadidx1(v *Value) bool {
-       // match: (MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
-       // cond:
-       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
-                       break
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
                }
-               if v_1.AuxInt != 2 {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if x.AuxInt != i-1 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if x.Aux != s {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVLloadidx4(v *Value) bool {
-       // match: (MOVLloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if p != x.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if idx != x.Args[1] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLstore(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVLstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if x.AuxInt != i-1 {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
+               if x.Aux != s {
                        break
                }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               if idx != x.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if p != x.Args[1] {
                        break
                }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w0.AuxInt != j-8 {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL4 {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               v.reset(Op386MOVLstoreidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [off] {sym} (ADDL ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVLstoreconst(v *Value) bool {
+func rewriteValue386_Op386MOVLload(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
        for {
-               sc := v.AuxInt
-               s := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDLconst {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
                mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
+               v.reset(Op386MOVLload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               sc := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               ptr := v_0.Args[0]
+               base := v_0.Args[0]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
+               v.reset(Op386MOVLload)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVLload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL1 {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVLload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL4 {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(off)
+               v.reset(Op386MOVLloadidx4)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym} (ADDL ptr idx) mem)
-       // cond:
-       // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
+       // match: (MOVLload [off] {sym} (ADDL ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVLloadidx1 [off] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDL {
@@ -4093,8 +4228,11 @@ func rewriteValue386_Op386MOVLstoreconst(v *Value) bool {
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = x
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -4103,10 +4241,10 @@ func rewriteValue386_Op386MOVLstoreconst(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
-       // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
+func rewriteValue386_Op386MOVLloadidx1(v *Value) bool {
+       // match: (MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
        // cond:
-       // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
+       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -4120,7 +4258,7 @@ func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx4)
+               v.reset(Op386MOVLloadidx4)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -4128,129 +4266,99 @@ func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
+       // match: (MOVLloadidx1 [c] {sym} (SHLLconst [2] idx) ptr mem)
        // cond:
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if v_0.Op != Op386SHLLconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(Op386MOVLloadidx4)
+               v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
+       // match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
        // cond:
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
                mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLstoreconstidx4(v *Value) bool {
-       // match: (MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem)
+       // match: (MOVLloadidx1 [c] {sym} idx (ADDLconst [d] ptr) mem)
        // cond:
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
-               ptr := v.Args[0]
+               idx := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(4 * c)
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool {
-       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem)
+       // match: (MOVLloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
-       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
-                       break
-               }
-               if v_1.AuxInt != 2 {
+               if v_1.Op != Op386ADDLconst {
                        break
                }
+               d := v_1.AuxInt
                idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx4)
-               v.AuxInt = c
+               mem := v.Args[2]
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] idx) ptr mem)
        // cond:
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -4259,49 +4367,23 @@ func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool {
                        break
                }
                d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-       // cond:
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx1)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVLloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
-       // match: (MOVLstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVLloadidx4(v *Value) bool {
+       // match: (MOVLloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
        // cond:
-       // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -4312,20 +4394,18 @@ func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
                d := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx4)
+               mem := v.Args[2]
+               v.reset(Op386MOVLloadidx4)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVLloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
-       // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -4336,50 +4416,25 @@ func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
                }
                d := v_1.AuxInt
                idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx4)
+               mem := v.Args[2]
+               v.reset(Op386MOVLloadidx4)
                v.AuxInt = c + 4*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSDconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (MOVSDconst [c])
-       // cond: config.ctxt.Flag_shared
-       // result: (MOVSDconst2 (MOVSDconst1 [c]))
-       for {
-               c := v.AuxInt
-               if !(config.ctxt.Flag_shared) {
-                       break
-               }
-               v.reset(Op386MOVSDconst2)
-               v0 := b.NewValue0(v.Pos, Op386MOVSDconst1, types.UInt32)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVSDload(v *Value) bool {
+func rewriteValue386_Op386MOVLstore(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVSDload [off1+off2] {sym} ptr mem)
+       // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -4389,20 +4444,45 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool {
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(Op386MOVSDload)
+               v.reset(Op386MOVLstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconst)
+               v.AuxInt = makeValAndOff(int64(int32(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -4413,20 +4493,22 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                base := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVSDload)
+               v.reset(Op386MOVLstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // match: (MOVLstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -4438,47 +4520,51 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool {
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSDloadidx1)
+               v.reset(Op386MOVLstoreidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem)
+       // match: (MOVLstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL8 {
+               if v_0.Op != Op386LEAL4 {
                        break
                }
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSDloadidx8)
+               v.reset(Op386MOVLstoreidx4)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDload [off] {sym} (ADDL ptr idx) mem)
+       // match: (MOVLstore [off] {sym} (ADDL ptr idx) val mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
+       // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -4488,231 +4574,131 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool {
                }
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(Op386MOVSDloadidx1)
+               v.reset(Op386MOVLstoreidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSDloadidx1(v *Value) bool {
-       // match: (MOVSDloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
+func rewriteValue386_Op386MOVLstoreconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
+               sc := v.AuxInt
+               s := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDLconst {
                        break
                }
-               d := v_0.AuxInt
+               off := v_0.AuxInt
                ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVSDloadidx8(v *Value) bool {
-       // match: (MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVSDloadidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVSDloadidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVSDstore(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(Op386MOVSDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
+               sc := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL {
                        break
                }
-               off2 := v_0.AuxInt
+               off := v_0.AuxInt
                sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVSDstore)
-               v.AuxInt = off1 + off2
+               v.reset(Op386MOVLstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVLstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               off1 := v.AuxInt
+               x := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL1 {
                        break
                }
-               off2 := v_0.AuxInt
+               off := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSDstoreidx1)
-               v.AuxInt = off1 + off2
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               off1 := v.AuxInt
+               x := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL8 {
+               if v_0.Op != Op386LEAL4 {
                        break
                }
-               off2 := v_0.AuxInt
+               off := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSDstoreidx8)
-               v.AuxInt = off1 + off2
+               v.reset(Op386MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstore [off] {sym} (ADDL ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (MOVLstoreconst [x] {sym} (ADDL ptr idx) mem)
+       // cond:
+       // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
        for {
-               off := v.AuxInt
+               x := v.AuxInt
                sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDL {
@@ -4720,103 +4706,241 @@ func rewriteValue386_Op386MOVSDstore(v *Value) bool {
                }
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
-                       break
-               }
-               v.reset(Op386MOVSDstoreidx1)
-               v.AuxInt = off
+               mem := v.Args[1]
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = x
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSDstoreidx1(v *Value) bool {
-       // match: (MOVSDstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
+       // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
        // cond:
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDLconst {
                        break
                }
-               d := v_0.AuxInt
+               c := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVSDstoreidx1)
-               v.AuxInt = c + d
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
        // cond:
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
-               c := v.AuxInt
+               x := v.AuxInt
                sym := v.Aux
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
-               d := v_1.AuxInt
+               c := v_1.AuxInt
                idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVSDstoreidx1)
-               v.AuxInt = c + d
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
-       // match: (MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVLstoreconstidx4(v *Value) bool {
+       // match: (MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem)
        // cond:
-       // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
-               c := v.AuxInt
+               x := v.AuxInt
                sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDLconst {
                        break
                }
-               d := v_0.AuxInt
+               c := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVSDstoreidx8)
-               v.AuxInt = c + d
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem)
        // cond:
-       // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
        for {
-               c := v.AuxInt
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(4 * c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool {
+       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} (SHLLconst [2] idx) ptr val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} idx (ADDLconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
                sym := v.Aux
                ptr := v.Args[0]
                v_1 := v.Args[1]
@@ -4827,8 +4951,32 @@ func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
                idx := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(Op386MOVSDstoreidx8)
-               v.AuxInt = c + 8*d
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -4838,37 +4986,88 @@ func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSconst(v *Value) bool {
+func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
+       // match: (MOVLstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSDconst(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (MOVSSconst [c])
+       // match: (MOVSDconst [c])
        // cond: config.ctxt.Flag_shared
-       // result: (MOVSSconst2 (MOVSSconst1 [c]))
+       // result: (MOVSDconst2 (MOVSDconst1 [c]))
        for {
                c := v.AuxInt
                if !(config.ctxt.Flag_shared) {
                        break
                }
-               v.reset(Op386MOVSSconst2)
-               v0 := b.NewValue0(v.Pos, Op386MOVSSconst1, types.UInt32)
+               v.reset(Op386MOVSDconst2)
+               v0 := b.NewValue0(v.Pos, Op386MOVSDconst1, types.UInt32)
                v0.AuxInt = c
                v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSSload(v *Value) bool {
+func rewriteValue386_Op386MOVSDload(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVSSload [off1+off2] {sym} ptr mem)
+       // result: (MOVSDload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -4882,16 +5081,16 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(Op386MOVSSload)
+               v.reset(Op386MOVSDload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -4906,16 +5105,16 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVSSload)
+               v.reset(Op386MOVSDload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // match: (MOVSDload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -4931,7 +5130,7 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSSloadidx1)
+               v.reset(Op386MOVSDloadidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -4939,14 +5138,14 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
+       // match: (MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL4 {
+               if v_0.Op != Op386LEAL8 {
                        break
                }
                off2 := v_0.AuxInt
@@ -4957,7 +5156,7 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSSloadidx4)
+               v.reset(Op386MOVSDloadidx8)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -4965,9 +5164,9 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off] {sym} (ADDL ptr idx) mem)
+       // match: (MOVSDload [off] {sym} (ADDL ptr idx) mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
+       // result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -4981,7 +5180,7 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(Op386MOVSSloadidx1)
+               v.reset(Op386MOVSDloadidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -4991,10 +5190,10 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
-       // match: (MOVSSloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
+func rewriteValue386_Op386MOVSDloadidx1(v *Value) bool {
+       // match: (MOVSDloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
        // cond:
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5006,7 +5205,7 @@ func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
                ptr := v_0.Args[0]
                idx := v.Args[1]
                mem := v.Args[2]
-               v.reset(Op386MOVSSloadidx1)
+               v.reset(Op386MOVSDloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5014,9 +5213,9 @@ func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // match: (MOVSDloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5028,7 +5227,7 @@ func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
                d := v_1.AuxInt
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(Op386MOVSSloadidx1)
+               v.reset(Op386MOVSDloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5038,10 +5237,10 @@ func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
-       // match: (MOVSSloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
+func rewriteValue386_Op386MOVSDloadidx8(v *Value) bool {
+       // match: (MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem)
        // cond:
-       // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5053,7 +5252,7 @@ func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
                ptr := v_0.Args[0]
                idx := v.Args[1]
                mem := v.Args[2]
-               v.reset(Op386MOVSSloadidx4)
+               v.reset(Op386MOVSDloadidx8)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5061,9 +5260,9 @@ func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // match: (MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
-       // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5075,8 +5274,8 @@ func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
                d := v_1.AuxInt
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(Op386MOVSSloadidx4)
-               v.AuxInt = c + 4*d
+               v.reset(Op386MOVSDloadidx8)
+               v.AuxInt = c + 8*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -5085,14 +5284,14 @@ func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSstore(v *Value) bool {
+func rewriteValue386_Op386MOVSDstore(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -5107,7 +5306,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(Op386MOVSSstore)
+               v.reset(Op386MOVSDstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
@@ -5115,9 +5314,9 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -5133,7 +5332,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVSSstore)
+               v.reset(Op386MOVSDstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -5141,9 +5340,9 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -5160,7 +5359,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSSstoreidx1)
+               v.reset(Op386MOVSDstoreidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -5169,14 +5368,14 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL4 {
+               if v_0.Op != Op386LEAL8 {
                        break
                }
                off2 := v_0.AuxInt
@@ -5188,7 +5387,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSSstoreidx4)
+               v.reset(Op386MOVSDstoreidx8)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -5197,9 +5396,9 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off] {sym} (ADDL ptr idx) val mem)
+       // match: (MOVSDstore [off] {sym} (ADDL ptr idx) val mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -5214,7 +5413,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(Op386MOVSSstoreidx1)
+               v.reset(Op386MOVSDstoreidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -5225,10 +5424,10 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
-       // match: (MOVSSstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVSDstoreidx1(v *Value) bool {
+       // match: (MOVSDstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
        // cond:
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5241,7 +5440,7 @@ func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
                idx := v.Args[1]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(Op386MOVSSstoreidx1)
+               v.reset(Op386MOVSDstoreidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5250,9 +5449,9 @@ func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
        // cond:
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5265,7 +5464,7 @@ func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
                idx := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(Op386MOVSSstoreidx1)
+               v.reset(Op386MOVSDstoreidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5276,10 +5475,10 @@ func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
-       // match: (MOVSSstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
+       // match: (MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem)
        // cond:
-       // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5292,7 +5491,7 @@ func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
                idx := v.Args[1]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(Op386MOVSSstoreidx4)
+               v.reset(Op386MOVSDstoreidx8)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5301,9 +5500,9 @@ func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem)
        // cond:
-       // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5312,2199 +5511,6145 @@ func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
                if v_1.Op != Op386ADDLconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVSSstoreidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWLSX(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWLSX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != Op386MOVWload {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSDstoreidx8)
+               v.AuxInt = c + 8*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (MOVSSconst [c])
+       // cond: config.ctxt.Flag_shared
+       // result: (MOVSSconst2 (MOVSSconst1 [c]))
+       for {
+               c := v.AuxInt
+               if !(config.ctxt.Flag_shared) {
+                       break
+               }
+               v.reset(Op386MOVSSconst2)
+               v0 := b.NewValue0(v.Pos, Op386MOVSSconst1, types.UInt32)
+               v0.AuxInt = c
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSload(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MOVSSload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVSSload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVSSloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVSSloadidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off] {sym} (ADDL ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVSSloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
+       // match: (MOVSSloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVSSloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // cond:
+       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVSSloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
+       // match: (MOVSSloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVSSloadidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // cond:
+       // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVSSloadidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSstore(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MOVSSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVSSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVSSstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVSSstoreidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off] {sym} (ADDL ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVSSstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
+       // match: (MOVSSstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSSstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSSstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
+       // match: (MOVSSstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSSstoreidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSSstoreidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWLSX(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWLSX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != Op386MOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, Op386MOVWLSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWLSX (ANDLconst [c] x))
+       // cond: c & 0x8000 == 0
+       // result: (ANDLconst [c & 0x7fff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x8000 == 0) {
+                       break
+               }
+               v.reset(Op386ANDLconst)
+               v.AuxInt = c & 0x7fff
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWLSXload(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVWLSXload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWLZX(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWLZX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != Op386MOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, Op386MOVWload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != Op386MOVWloadidx2 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx2, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWLZX (ANDLconst [c] x))
+       // cond:
+       // result: (ANDLconst [c & 0xffff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(Op386ANDLconst)
+               v.AuxInt = c & 0xffff
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWload(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVWstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL2 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off] {sym} (ADDL ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWloadidx1(v *Value) bool {
+       // match: (MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
+       // cond:
+       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} (SHLLconst [1] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} idx (ADDLconst [d] ptr) mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWloadidx2(v *Value) bool {
+       // match: (MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // cond:
+       // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = c + 2*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstore(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstore [off] {sym} ptr (MOVWLSX x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVWLSX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWLZX x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVWLZX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconst)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL2 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} (ADDL ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRLconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != Op386MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != Op386MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL2 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym} (ADDL ptr idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               v.reset(Op386MOVWstoreconstidx1)
+               v.AuxInt = x
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != Op386MOVWstoreconst {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               mem := x.Args[1]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconst)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreconstidx1(v *Value) bool {
+       // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != Op386MOVWstoreconstidx1 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(i)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreconstidx2(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(2 * c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != Op386MOVWstoreconstidx2 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, i.Type)
+               v0.AuxInt = 1
+               v0.AddArg(i)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreidx1(v *Value) bool {
+       // match: (MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (SHLLconst [1] idx) ptr val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} idx (ADDLconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} idx p w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} idx p w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreidx2(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = c + 2*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx2 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx2 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MULL(v *Value) bool {
+       // match: (MULL x (MOVLconst [c]))
+       // cond:
+       // result: (MULLconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(Op386MULLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULL (MOVLconst [c]) x)
+       // cond:
+       // result: (MULLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(Op386MULLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MULLconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULLconst [c] (MULLconst [d] x))
+       // cond:
+       // result: (MULLconst [int64(int32(c * d))] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MULLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(Op386MULLconst)
+               v.AuxInt = int64(int32(c * d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [-1] x)
+       // cond:
+       // result: (NEGL x)
+       for {
+               if v.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386NEGL)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [0] _)
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v.reset(Op386MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULLconst [1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [3] x)
+       // cond:
+       // result: (LEAL2 x x)
+       for {
+               if v.AuxInt != 3 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL2)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [5] x)
+       // cond:
+       // result: (LEAL4 x x)
+       for {
+               if v.AuxInt != 5 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [7] x)
+       // cond:
+       // result: (LEAL8 (NEGL <v.Type> x) x)
+       for {
+               if v.AuxInt != 7 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v0 := b.NewValue0(v.Pos, Op386NEGL, v.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [9] x)
+       // cond:
+       // result: (LEAL8 x x)
+       for {
+               if v.AuxInt != 9 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [11] x)
+       // cond:
+       // result: (LEAL2 x (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 11 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL2)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [13] x)
+       // cond:
+       // result: (LEAL4 x (LEAL2 <v.Type> x x))
+       for {
+               if v.AuxInt != 13 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [21] x)
+       // cond:
+       // result: (LEAL4 x (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 21 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [25] x)
+       // cond:
+       // result: (LEAL8 x (LEAL2 <v.Type> x x))
+       for {
+               if v.AuxInt != 25 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [37] x)
+       // cond:
+       // result: (LEAL4 x (LEAL8 <v.Type> x x))
+       for {
+               if v.AuxInt != 37 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [41] x)
+       // cond:
+       // result: (LEAL8 x (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 41 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [73] x)
+       // cond:
+       // result: (LEAL8 x (LEAL8 <v.Type> x x))
+       for {
+               if v.AuxInt != 73 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c)
+       // result: (SHLLconst [log2(c)] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(Op386SHLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
+                       break
+               }
+               v.reset(Op386SUBL)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
+                       break
+               }
+               v.reset(Op386LEAL1)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-2) && c >= 34
+       // result: (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-2) && c >= 34) {
+                       break
+               }
+               v.reset(Op386LEAL2)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 2)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-4) && c >= 68
+       // result: (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-4) && c >= 68) {
+                       break
+               }
+               v.reset(Op386LEAL4)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 4)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-8) && c >= 136
+       // result: (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-8) && c >= 136) {
+                       break
+               }
+               v.reset(Op386LEAL8)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 8)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(Op386SHLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(Op386SHLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(Op386SHLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [c] (MOVLconst [d]))
+       // cond:
+       // result: (MOVLconst [int64(int32(c*d))])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(Op386MOVLconst)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386NEGL(v *Value) bool {
+       // match: (NEGL (MOVLconst [c]))
+       // cond:
+       // result: (MOVLconst [int64(int32(-c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(Op386MOVLconst)
+               v.AuxInt = int64(int32(-c))
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386NOTL(v *Value) bool {
+       // match: (NOTL (MOVLconst [c]))
+       // cond:
+       // result: (MOVLconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(Op386MOVLconst)
+               v.AuxInt = ^c
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386ORL(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (ORL x (MOVLconst [c]))
+       // cond:
+       // result: (ORLconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(Op386ORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (MOVLconst [c]) x)
+       // cond:
+       // result: (ORLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(Op386ORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (SHLLconst [c] x) (SHRLconst [d] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(Op386ROLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (SHRLconst [d] x) (SHLLconst [c] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHRLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(Op386ROLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
+                       break
+               }
+               v.reset(Op386ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHRWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
+                       break
+               }
+               v.reset(Op386ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRBconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
+                       break
+               }
+               v.reset(Op386ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHRBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
+                       break
+               }
+               v.reset(Op386ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL x0:(MOVBload [i0] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBload {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)) x0:(MOVWload [i0] {s} p mem)) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBload {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)) o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBload {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               mem := x2.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)) x0:(MOVWload [i0] {s} p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBload {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               mem := x2.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x2.Args[0] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, Op386MOVWLSXload, v.Type)
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVWLSX (ANDLconst [c] x))
-       // cond: c & 0x8000 == 0
-       // result: (ANDLconst [c & 0x7fff] x)
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ANDLconst {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c&0x8000 == 0) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386ANDLconst)
-               v.AuxInt = c & 0x7fff
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWLSXload(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               if s0.AuxInt != 16 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386MOVWLSXload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWLZX(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWLZX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != Op386MOVWload {
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, Op386MOVWload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v.Args[0]
-               if x.Op != Op386MOVWloadidx1 {
+               if p != x0.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x0.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != Op386MOVWloadidx2 {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx2, v.Type)
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVWLZX (ANDLconst [c] x))
-       // cond:
-       // result: (ANDLconst [c & 0xffff] x)
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ANDLconst {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(Op386ANDLconst)
-               v.AuxInt = c & 0xffff
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWload(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVWstore {
+               if mem != x0.Args[2] {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWload  [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if s1.AuxInt != 24 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               i3 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if idx != x2.Args[0] {
                        break
                }
-               v.reset(Op386MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               if p != x2.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if mem != x2.Args[2] {
                        break
                }
-               v.reset(Op386MOVWloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL2 {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386MOVWloadidx2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off] {sym} (ADDL ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               if s0.AuxInt != 16 {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386MOVWloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWloadidx1(v *Value) bool {
-       // match: (MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
-       // cond:
-       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               i2 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if v_1.AuxInt != 1 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if idx != x0.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if p != x0.Args[1] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWloadidx2(v *Value) bool {
-       // match: (MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if mem != x0.Args[2] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx2)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx2)
-               v.AuxInt = c + 2*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstore(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstore [off] {sym} ptr (MOVWLSX x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVWLSX {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWLZX x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVWLZX {
+               i3 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if idx != x2.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if p != x2.Args[1] {
                        break
                }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if mem != x2.Args[2] {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if s1.AuxInt != 24 {
                        break
                }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               v.reset(Op386MOVWstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL2 {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(Op386MOVWstoreidx2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} (ADDL ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               if p != x0.Args[0] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(Op386MOVWstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRLconst {
+               if mem != x0.Args[2] {
                        break
                }
-               if v_1.AuxInt != 16 {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != Op386MOVWstore {
+               if s0.AuxInt != 16 {
                        break
                }
-               if x.AuxInt != i-2 {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x.Aux != s {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if w != x.Args[1] {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRLconst {
+               if p != x0.Args[0] {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != Op386MOVWstore {
+               if idx != x0.Args[1] {
                        break
                }
-               if x.AuxInt != i-2 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x.Aux != s {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               if p != x.Args[0] {
+               if s0.AuxInt != 16 {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != Op386SHRLconst {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if w0.AuxInt != j-16 {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w != w0.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if mem != x1.Args[2] {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if s1.AuxInt != 24 {
                        break
                }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               v.reset(Op386MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL2 {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(Op386MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [x] {sym} (ADDL ptr idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               if idx != x0.Args[0] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               v.reset(Op386MOVWstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != Op386MOVWstoreconst {
+               if p != x0.Args[1] {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if mem != x0.Args[2] {
                        break
                }
-               if p != x.Args[0] {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               mem := x.Args[1]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               if s0.AuxInt != 16 {
                        break
                }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreconstidx1(v *Value) bool {
-       // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if v_1.AuxInt != 1 {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if p != x1.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != Op386MOVWstoreconstidx1 {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if s1.AuxInt != 24 {
                        break
                }
-               if p != x.Args[0] {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if i != x.Args[1] {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(i)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreconstidx2(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if idx != x0.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(2 * c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != Op386MOVWstoreconstidx2 {
+               if p != x0.Args[1] {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if mem != x0.Args[2] {
                        break
                }
-               if p != x.Args[0] {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               if i != x.Args[1] {
+               if s0.AuxInt != 16 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, i.Type)
-               v0.AuxInt = 1
-               v0.AddArg(i)
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(mem)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreidx1(v *Value) bool {
-       // match: (MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if s1.AuxInt != 24 {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
                        break
                }
-               if v_2.AuxInt != 16 {
+               if mem != x1.Args[2] {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != Op386MOVWstoreidx1 {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               if x.AuxInt != i-2 {
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               if x.Aux != s {
+               if s1.AuxInt != 24 {
                        break
                }
-               if p != x.Args[0] {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if idx != x.Args[1] {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if w != x.Args[2] {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               if p != x0.Args[0] {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != Op386MOVWstoreidx1 {
+               if idx != x0.Args[1] {
                        break
                }
-               if x.AuxInt != i-2 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x.Aux != s {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               if p != x.Args[0] {
+               if s0.AuxInt != 16 {
                        break
                }
-               if idx != x.Args[1] {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != Op386SHRLconst {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w0.AuxInt != j-16 {
+               if idx != x1.Args[0] {
                        break
                }
-               if w != w0.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreidx2(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx2)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx2)
-               v.AuxInt = c + 2*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               if s1.AuxInt != 24 {
                        break
                }
-               if v_2.AuxInt != 16 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != Op386MOVWstoreidx2 {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if x.AuxInt != i-2 {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != Op386MOVWstoreidx2 {
+               if s1.AuxInt != 24 {
                        break
                }
-               if x.AuxInt != i-2 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x.Aux != s {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if p != x.Args[0] {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if idx != x.Args[1] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != Op386SHRLconst {
+               if idx != x0.Args[0] {
                        break
                }
-               if w0.AuxInt != j-16 {
+               if p != x0.Args[1] {
                        break
                }
-               if w != w0.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
-               v.AddArg(v0)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MULL(v *Value) bool {
-       // match: (MULL x (MOVLconst [c]))
-       // cond:
-       // result: (MULLconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(Op386MULLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULL (MOVLconst [c]) x)
-       // cond:
-       // result: (MULLconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(Op386MULLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MULLconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULLconst [c] (MULLconst [d] x))
-       // cond:
-       // result: (MULLconst [int64(int32(c * d))] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MULLconst {
+               if idx != x1.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(Op386MULLconst)
-               v.AuxInt = int64(int32(c * d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [-1] x)
-       // cond:
-       // result: (NEGL x)
-       for {
-               if v.AuxInt != -1 {
+               if p != x1.Args[1] {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386NEGL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [0] _)
-       // cond:
-       // result: (MOVLconst [0])
-       for {
-               if v.AuxInt != 0 {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(Op386MOVLconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULLconst [1] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 1 {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               x := v.Args[0]
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULLconst [3] x)
-       // cond:
-       // result: (LEAL2 x x)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               if v.AuxInt != 3 {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL2)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [5] x)
-       // cond:
-       // result: (LEAL4 x x)
-       for {
-               if v.AuxInt != 5 {
+               if s1.AuxInt != 24 {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL4)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [7] x)
-       // cond:
-       // result: (LEAL8 (NEGL <v.Type> x) x)
-       for {
-               if v.AuxInt != 7 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v0 := b.NewValue0(v.Pos, Op386NEGL, v.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [9] x)
-       // cond:
-       // result: (LEAL8 x x)
-       for {
-               if v.AuxInt != 9 {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [11] x)
-       // cond:
-       // result: (LEAL2 x (LEAL4 <v.Type> x x))
-       for {
-               if v.AuxInt != 11 {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL2)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [13] x)
-       // cond:
-       // result: (LEAL4 x (LEAL2 <v.Type> x x))
-       for {
-               if v.AuxInt != 13 {
+               if s0.AuxInt != 16 {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [21] x)
-       // cond:
-       // result: (LEAL4 x (LEAL4 <v.Type> x x))
-       for {
-               if v.AuxInt != 21 {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [25] x)
-       // cond:
-       // result: (LEAL8 x (LEAL2 <v.Type> x x))
-       for {
-               if v.AuxInt != 25 {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [37] x)
-       // cond:
-       // result: (LEAL4 x (LEAL8 <v.Type> x x))
-       for {
-               if v.AuxInt != 37 {
+               if p != x1.Args[0] {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [41] x)
-       // cond:
-       // result: (LEAL8 x (LEAL4 <v.Type> x x))
-       for {
-               if v.AuxInt != 41 {
+               if idx != x1.Args[1] {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [73] x)
-       // cond:
-       // result: (LEAL8 x (LEAL8 <v.Type> x x))
-       for {
-               if v.AuxInt != 73 {
+               if mem != x1.Args[2] {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SHLLconst [log2(c)] x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c)) {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               v.reset(Op386SHLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(Op386SUBL)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c + 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(Op386LEAL1)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c-2) && c >= 34
-       // result: (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-2) && c >= 34) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               v.reset(Op386LEAL2)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c - 2)
-               v0.AddArg(x)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(x)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c-4) && c >= 68
-       // result: (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-4) && c >= 68) {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386LEAL4)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c - 4)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c-8) && c >= 136
-       // result: (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-8) && c >= 136) {
+               if s1.AuxInt != 24 {
                        break
                }
-               v.reset(Op386LEAL8)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c - 8)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386SHLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               v.reset(Op386SHLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386SHLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] (MOVLconst [d]))
-       // cond:
-       // result: (MOVLconst [int64(int32(c*d))])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               if s0.AuxInt != 16 {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(Op386MOVLconst)
-               v.AuxInt = int64(int32(c * d))
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386NEGL(v *Value) bool {
-       // match: (NEGL (MOVLconst [c]))
-       // cond:
-       // result: (MOVLconst [int64(int32(-c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(Op386MOVLconst)
-               v.AuxInt = int64(int32(-c))
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386NOTL(v *Value) bool {
-       // match: (NOTL (MOVLconst [c]))
-       // cond:
-       // result: (MOVLconst [^c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(Op386MOVLconst)
-               v.AuxInt = ^c
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386ORL(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (ORL x (MOVLconst [c]))
-       // cond:
-       // result: (ORLconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if p != x1.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(Op386ORLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL (MOVLconst [c]) x)
-       // cond:
-       // result: (ORLconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               if idx != x1.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(Op386ORLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL (SHLLconst [c] x) (SHRLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [c   ] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHLLconst {
+               if mem != x1.Args[2] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRLconst {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x != v_1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(Op386ROLLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: ( ORL (SHRLconst [c] x) (SHLLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [32-c] x)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHRLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               if p != x1.Args[1] {
                        break
                }
-               if x != v_1.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(Op386ROLLconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHLLconst {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRWconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if v_1.AuxInt != 16-c {
+               if p != x0.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(Op386ROLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: ( ORL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHRWconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               if s1.AuxInt != 24 {
                        break
                }
-               if v_1.AuxInt != 16-c {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x != v_1.Args[0] {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386ROLWconst)
-               v.AuxInt = 16 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHLLconst {
+               if s0.AuxInt != 16 {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRBconst {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if v_1.AuxInt != 8-c {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x != v_1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(Op386ROLBconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHRBconst {
+               if mem != x1.Args[2] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if v_1.AuxInt != 8-c {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x != v_1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(Op386ROLBconst)
-               v.AuxInt = 8 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL x:(SHLLconst _) y)
-       // cond: y.Op != Op386SHLLconst
-       // result: (ORL y x)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != Op386SHLLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               y := v.Args[1]
-               if !(y.Op != Op386SHLLconst) {
+               if s1.AuxInt != 24 {
                        break
                }
-               v.reset(Op386ORL)
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL                  x0:(MOVBload [i]   {s} p mem)     s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != Op386MOVBload {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != Op386SHLLconst {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if s0.AuxInt != 8 {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != Op386MOVBload {
+               if s0.AuxInt != 16 {
                        break
                }
-               if x1.AuxInt != i+1 {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
+               i2 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if mem != x1.Args[2] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, Op386MOVWload, types.UInt16)
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORL o0:(ORL                        x0:(MOVWload [i]   {s} p mem)     s0:(SHLLconst [16] x1:(MOVBload [i+2] {s} p mem)))     s1:(SHLLconst [24] x2:(MOVBload [i+3] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               o0 := v.Args[0]
-               if o0.Op != Op386ORL {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != Op386MOVWload {
+               if s1.AuxInt != 24 {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
                if s0.Op != Op386SHLLconst {
                        break
                }
@@ -7512,187 +11657,205 @@ func rewriteValue386_Op386ORL(v *Value) bool {
                        break
                }
                x1 := s0.Args[0]
-               if x1.Op != Op386MOVBload {
-                       break
-               }
-               if x1.AuxInt != i+2 {
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
+               i2 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
-                       break
-               }
-               s1 := v.Args[1]
-               if s1.Op != Op386SHLLconst {
+               if idx != x1.Args[1] {
                        break
                }
-               if s1.AuxInt != 24 {
+               if mem != x1.Args[2] {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != Op386MOVBload {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if x2.AuxInt != i+3 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x2.Aux != s {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x2.Args[0] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x2.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORL                  x0:(MOVBloadidx1 [i]   {s} p idx mem)     s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               x0 := v.Args[0]
-               if x0.Op != Op386MOVBloadidx1 {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := v.Args[1]
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
                if s0.Op != Op386SHLLconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               if s0.AuxInt != 16 {
                        break
                }
                x1 := s0.Args[0]
                if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i2 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
                v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORL o0:(ORL                        x0:(MOVWloadidx1 [i]   {s} p idx mem)     s0:(SHLLconst [16] x1:(MOVBloadidx1 [i+2] {s} p idx mem)))     s1:(SHLLconst [24] x2:(MOVBloadidx1 [i+3] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               o0 := v.Args[0]
-               if o0.Op != Op386ORL {
-                       break
-               }
-               x0 := o0.Args[0]
-               if x0.Op != Op386MOVWloadidx1 {
-                       break
-               }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != Op386SHLLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               if s0.AuxInt != 16 {
+               if s1.AuxInt != 24 {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != Op386MOVBloadidx1 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x1.AuxInt != i+2 {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if x1.Aux != s {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               if p != x1.Args[0] {
+               if s0.AuxInt != 16 {
                        break
                }
-               if idx != x1.Args[1] {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if mem != x1.Args[2] {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != Op386SHLLconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if s1.AuxInt != 24 {
+               if p != x1.Args[1] {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != Op386MOVBloadidx1 {
+               if mem != x1.Args[2] {
                        break
                }
-               if x2.AuxInt != i+3 {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if x2.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x2.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x2.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x2.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2)
                v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
                v0.AddArg(idx)
@@ -9180,9 +13343,9 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL (SHLLconst [c] x) (SHRLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [c   ] x)
+       // match: (XORL (SHLLconst [c] x) (SHRLconst [d] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != Op386SHLLconst {
@@ -9194,10 +13357,11 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                if v_1.Op != Op386SHRLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(Op386ROLLconst)
@@ -9205,34 +13369,35 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL (SHRLconst [c] x) (SHLLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [32-c] x)
+       // match: (XORL (SHRLconst [d] x) (SHLLconst [c] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(Op386ROLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -9245,13 +13410,11 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                if v_1.Op != Op386SHRWconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
                        break
                }
                v.reset(Op386ROLWconst)
@@ -9259,38 +13422,36 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (XORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
                        break
                }
                v.reset(Op386ROLWconst)
-               v.AuxInt = 16 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -9303,13 +13464,11 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                if v_1.Op != Op386SHRBconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
                        break
                }
                v.reset(Op386ROLBconst)
@@ -9317,32 +13476,30 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
+       // match: (XORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRBconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
                        break
                }
                v.reset(Op386ROLBconst)
-               v.AuxInt = 8 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -9408,7 +13565,7 @@ func rewriteValue386_Op386XORLconst(v *Value) bool {
        return false
 }
 func rewriteValue386_OpAdd16(v *Value) bool {
-       // match: (Add16  x y)
+       // match: (Add16 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -9421,7 +13578,7 @@ func rewriteValue386_OpAdd16(v *Value) bool {
        }
 }
 func rewriteValue386_OpAdd32(v *Value) bool {
-       // match: (Add32  x y)
+       // match: (Add32 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -9488,7 +13645,7 @@ func rewriteValue386_OpAdd64F(v *Value) bool {
        }
 }
 func rewriteValue386_OpAdd8(v *Value) bool {
-       // match: (Add8   x y)
+       // match: (Add8 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -9553,7 +13710,7 @@ func rewriteValue386_OpAnd32(v *Value) bool {
        }
 }
 func rewriteValue386_OpAnd8(v *Value) bool {
-       // match: (And8  x y)
+       // match: (And8 x y)
        // cond:
        // result: (ANDL x y)
        for {
@@ -9642,7 +13799,7 @@ func rewriteValue386_OpCom32(v *Value) bool {
        }
 }
 func rewriteValue386_OpCom8(v *Value) bool {
-       // match: (Com8  x)
+       // match: (Com8 x)
        // cond:
        // result: (NOTL x)
        for {
@@ -9653,7 +13810,7 @@ func rewriteValue386_OpCom8(v *Value) bool {
        }
 }
 func rewriteValue386_OpConst16(v *Value) bool {
-       // match: (Const16  [val])
+       // match: (Const16 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -9664,7 +13821,7 @@ func rewriteValue386_OpConst16(v *Value) bool {
        }
 }
 func rewriteValue386_OpConst32(v *Value) bool {
-       // match: (Const32  [val])
+       // match: (Const32 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -9697,7 +13854,7 @@ func rewriteValue386_OpConst64F(v *Value) bool {
        }
 }
 func rewriteValue386_OpConst8(v *Value) bool {
-       // match: (Const8   [val])
+       // match: (Const8 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -9810,7 +13967,7 @@ func rewriteValue386_OpCvt64Fto32F(v *Value) bool {
        }
 }
 func rewriteValue386_OpDiv16(v *Value) bool {
-       // match: (Div16  x y)
+       // match: (Div16 x y)
        // cond:
        // result: (DIVW  x y)
        for {
@@ -9836,7 +13993,7 @@ func rewriteValue386_OpDiv16u(v *Value) bool {
        }
 }
 func rewriteValue386_OpDiv32(v *Value) bool {
-       // match: (Div32  x y)
+       // match: (Div32 x y)
        // cond:
        // result: (DIVL  x y)
        for {
@@ -9892,7 +14049,7 @@ func rewriteValue386_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   x y)
+       // match: (Div8 x y)
        // cond:
        // result: (DIVW  (SignExt8to16 x) (SignExt8to16 y))
        for {
@@ -9913,7 +14070,7 @@ func rewriteValue386_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  x y)
+       // match: (Div8u x y)
        // cond:
        // result: (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))
        for {
@@ -9932,7 +14089,7 @@ func rewriteValue386_OpDiv8u(v *Value) bool {
 func rewriteValue386_OpEq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq16  x y)
+       // match: (Eq16 x y)
        // cond:
        // result: (SETEQ (CMPW x y))
        for {
@@ -9949,7 +14106,7 @@ func rewriteValue386_OpEq16(v *Value) bool {
 func rewriteValue386_OpEq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq32  x y)
+       // match: (Eq32 x y)
        // cond:
        // result: (SETEQ (CMPL x y))
        for {
@@ -10000,7 +14157,7 @@ func rewriteValue386_OpEq64F(v *Value) bool {
 func rewriteValue386_OpEq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq8   x y)
+       // match: (Eq8 x y)
        // cond:
        // result: (SETEQ (CMPB x y))
        for {
@@ -10017,7 +14174,7 @@ func rewriteValue386_OpEq8(v *Value) bool {
 func rewriteValue386_OpEqB(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (EqB   x y)
+       // match: (EqB x y)
        // cond:
        // result: (SETEQ (CMPB x y))
        for {
@@ -10051,7 +14208,7 @@ func rewriteValue386_OpEqPtr(v *Value) bool {
 func rewriteValue386_OpGeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq16  x y)
+       // match: (Geq16 x y)
        // cond:
        // result: (SETGE (CMPW x y))
        for {
@@ -10085,7 +14242,7 @@ func rewriteValue386_OpGeq16U(v *Value) bool {
 func rewriteValue386_OpGeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq32  x y)
+       // match: (Geq32 x y)
        // cond:
        // result: (SETGE (CMPL x y))
        for {
@@ -10153,7 +14310,7 @@ func rewriteValue386_OpGeq64F(v *Value) bool {
 func rewriteValue386_OpGeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq8   x y)
+       // match: (Geq8 x y)
        // cond:
        // result: (SETGE (CMPB x y))
        for {
@@ -10170,7 +14327,7 @@ func rewriteValue386_OpGeq8(v *Value) bool {
 func rewriteValue386_OpGeq8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq8U  x y)
+       // match: (Geq8U x y)
        // cond:
        // result: (SETAE (CMPB x y))
        for {
@@ -10207,7 +14364,7 @@ func rewriteValue386_OpGetG(v *Value) bool {
 func rewriteValue386_OpGreater16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater16  x y)
+       // match: (Greater16 x y)
        // cond:
        // result: (SETG (CMPW x y))
        for {
@@ -10241,7 +14398,7 @@ func rewriteValue386_OpGreater16U(v *Value) bool {
 func rewriteValue386_OpGreater32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater32  x y)
+       // match: (Greater32 x y)
        // cond:
        // result: (SETG (CMPL x y))
        for {
@@ -10309,7 +14466,7 @@ func rewriteValue386_OpGreater64F(v *Value) bool {
 func rewriteValue386_OpGreater8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater8   x y)
+       // match: (Greater8 x y)
        // cond:
        // result: (SETG (CMPB x y))
        for {
@@ -10326,7 +14483,7 @@ func rewriteValue386_OpGreater8(v *Value) bool {
 func rewriteValue386_OpGreater8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater8U  x y)
+       // match: (Greater8U x y)
        // cond:
        // result: (SETA (CMPB x y))
        for {
@@ -10341,7 +14498,7 @@ func rewriteValue386_OpGreater8U(v *Value) bool {
        }
 }
 func rewriteValue386_OpHmul32(v *Value) bool {
-       // match: (Hmul32  x y)
+       // match: (Hmul32 x y)
        // cond:
        // result: (HMULL  x y)
        for {
@@ -10434,7 +14591,7 @@ func rewriteValue386_OpIsSliceInBounds(v *Value) bool {
 func rewriteValue386_OpLeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq16  x y)
+       // match: (Leq16 x y)
        // cond:
        // result: (SETLE (CMPW x y))
        for {
@@ -10468,7 +14625,7 @@ func rewriteValue386_OpLeq16U(v *Value) bool {
 func rewriteValue386_OpLeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq32  x y)
+       // match: (Leq32 x y)
        // cond:
        // result: (SETLE (CMPL x y))
        for {
@@ -10536,7 +14693,7 @@ func rewriteValue386_OpLeq64F(v *Value) bool {
 func rewriteValue386_OpLeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq8   x y)
+       // match: (Leq8 x y)
        // cond:
        // result: (SETLE (CMPB x y))
        for {
@@ -10553,7 +14710,7 @@ func rewriteValue386_OpLeq8(v *Value) bool {
 func rewriteValue386_OpLeq8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq8U  x y)
+       // match: (Leq8U x y)
        // cond:
        // result: (SETBE (CMPB x y))
        for {
@@ -10570,7 +14727,7 @@ func rewriteValue386_OpLeq8U(v *Value) bool {
 func rewriteValue386_OpLess16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less16  x y)
+       // match: (Less16 x y)
        // cond:
        // result: (SETL (CMPW x y))
        for {
@@ -10604,7 +14761,7 @@ func rewriteValue386_OpLess16U(v *Value) bool {
 func rewriteValue386_OpLess32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less32  x y)
+       // match: (Less32 x y)
        // cond:
        // result: (SETL (CMPL x y))
        for {
@@ -10672,7 +14829,7 @@ func rewriteValue386_OpLess64F(v *Value) bool {
 func rewriteValue386_OpLess8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less8   x y)
+       // match: (Less8 x y)
        // cond:
        // result: (SETL (CMPB x y))
        for {
@@ -10689,7 +14846,7 @@ func rewriteValue386_OpLess8(v *Value) bool {
 func rewriteValue386_OpLess8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less8U  x y)
+       // match: (Less8U x y)
        // cond:
        // result: (SETB (CMPB x y))
        for {
@@ -10869,7 +15026,7 @@ func rewriteValue386_OpLsh16x64(v *Value) bool {
 func rewriteValue386_OpLsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -10978,7 +15135,7 @@ func rewriteValue386_OpLsh32x64(v *Value) bool {
 func rewriteValue386_OpLsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -11087,7 +15244,7 @@ func rewriteValue386_OpLsh8x64(v *Value) bool {
 func rewriteValue386_OpLsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -11109,7 +15266,7 @@ func rewriteValue386_OpLsh8x8(v *Value) bool {
        }
 }
 func rewriteValue386_OpMod16(v *Value) bool {
-       // match: (Mod16  x y)
+       // match: (Mod16 x y)
        // cond:
        // result: (MODW  x y)
        for {
@@ -11135,7 +15292,7 @@ func rewriteValue386_OpMod16u(v *Value) bool {
        }
 }
 func rewriteValue386_OpMod32(v *Value) bool {
-       // match: (Mod32  x y)
+       // match: (Mod32 x y)
        // cond:
        // result: (MODL  x y)
        for {
@@ -11165,7 +15322,7 @@ func rewriteValue386_OpMod8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8   x y)
+       // match: (Mod8 x y)
        // cond:
        // result: (MODW  (SignExt8to16 x) (SignExt8to16 y))
        for {
@@ -11186,7 +15343,7 @@ func rewriteValue386_OpMod8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8u  x y)
+       // match: (Mod8u x y)
        // cond:
        // result: (MODWU (ZeroExt8to16 x) (ZeroExt8to16 y))
        for {
@@ -11491,7 +15648,7 @@ func rewriteValue386_OpMove(v *Value) bool {
        return false
 }
 func rewriteValue386_OpMul16(v *Value) bool {
-       // match: (Mul16  x y)
+       // match: (Mul16 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -11504,7 +15661,7 @@ func rewriteValue386_OpMul16(v *Value) bool {
        }
 }
 func rewriteValue386_OpMul32(v *Value) bool {
-       // match: (Mul32  x y)
+       // match: (Mul32 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -11556,7 +15713,7 @@ func rewriteValue386_OpMul64F(v *Value) bool {
        }
 }
 func rewriteValue386_OpMul8(v *Value) bool {
-       // match: (Mul8   x y)
+       // match: (Mul8 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -11569,7 +15726,7 @@ func rewriteValue386_OpMul8(v *Value) bool {
        }
 }
 func rewriteValue386_OpNeg16(v *Value) bool {
-       // match: (Neg16  x)
+       // match: (Neg16 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -11580,7 +15737,7 @@ func rewriteValue386_OpNeg16(v *Value) bool {
        }
 }
 func rewriteValue386_OpNeg32(v *Value) bool {
-       // match: (Neg32  x)
+       // match: (Neg32 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -11663,7 +15820,7 @@ func rewriteValue386_OpNeg64F(v *Value) bool {
        return false
 }
 func rewriteValue386_OpNeg8(v *Value) bool {
-       // match: (Neg8   x)
+       // match: (Neg8 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -11676,7 +15833,7 @@ func rewriteValue386_OpNeg8(v *Value) bool {
 func rewriteValue386_OpNeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq16  x y)
+       // match: (Neq16 x y)
        // cond:
        // result: (SETNE (CMPW x y))
        for {
@@ -11693,7 +15850,7 @@ func rewriteValue386_OpNeq16(v *Value) bool {
 func rewriteValue386_OpNeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq32  x y)
+       // match: (Neq32 x y)
        // cond:
        // result: (SETNE (CMPL x y))
        for {
@@ -11744,7 +15901,7 @@ func rewriteValue386_OpNeq64F(v *Value) bool {
 func rewriteValue386_OpNeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq8   x y)
+       // match: (Neq8 x y)
        // cond:
        // result: (SETNE (CMPB x y))
        for {
@@ -11761,7 +15918,7 @@ func rewriteValue386_OpNeq8(v *Value) bool {
 func rewriteValue386_OpNeqB(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (NeqB   x y)
+       // match: (NeqB x y)
        // cond:
        // result: (SETNE (CMPB x y))
        for {
@@ -11857,7 +16014,7 @@ func rewriteValue386_OpOr32(v *Value) bool {
        }
 }
 func rewriteValue386_OpOr8(v *Value) bool {
-       // match: (Or8  x y)
+       // match: (Or8 x y)
        // cond:
        // result: (ORL x y)
        for {
@@ -11994,7 +16151,7 @@ func rewriteValue386_OpRsh16Ux64(v *Value) bool {
 func rewriteValue386_OpRsh16Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16])))
        for {
@@ -12111,7 +16268,7 @@ func rewriteValue386_OpRsh16x64(v *Value) bool {
 func rewriteValue386_OpRsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x8  <t> x y)
+       // match: (Rsh16x8 <t> x y)
        // cond:
        // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16])))))
        for {
@@ -12223,7 +16380,7 @@ func rewriteValue386_OpRsh32Ux64(v *Value) bool {
 func rewriteValue386_OpRsh32Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -12340,7 +16497,7 @@ func rewriteValue386_OpRsh32x64(v *Value) bool {
 func rewriteValue386_OpRsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x8  <t> x y)
+       // match: (Rsh32x8 <t> x y)
        // cond:
        // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32])))))
        for {
@@ -12452,7 +16609,7 @@ func rewriteValue386_OpRsh8Ux64(v *Value) bool {
 func rewriteValue386_OpRsh8Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8])))
        for {
@@ -12569,7 +16726,7 @@ func rewriteValue386_OpRsh8x64(v *Value) bool {
 func rewriteValue386_OpRsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x8  <t> x y)
+       // match: (Rsh8x8 <t> x y)
        // cond:
        // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
        for {
@@ -12605,7 +16762,7 @@ func rewriteValue386_OpSignExt16to32(v *Value) bool {
        }
 }
 func rewriteValue386_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  x)
+       // match: (SignExt8to16 x)
        // cond:
        // result: (MOVBLSX x)
        for {
@@ -12616,7 +16773,7 @@ func rewriteValue386_OpSignExt8to16(v *Value) bool {
        }
 }
 func rewriteValue386_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  x)
+       // match: (SignExt8to32 x)
        // cond:
        // result: (MOVBLSX x)
        for {
@@ -12770,7 +16927,7 @@ func rewriteValue386_OpStore(v *Value) bool {
        return false
 }
 func rewriteValue386_OpSub16(v *Value) bool {
-       // match: (Sub16  x y)
+       // match: (Sub16 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -12783,7 +16940,7 @@ func rewriteValue386_OpSub16(v *Value) bool {
        }
 }
 func rewriteValue386_OpSub32(v *Value) bool {
-       // match: (Sub32  x y)
+       // match: (Sub32 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -12850,7 +17007,7 @@ func rewriteValue386_OpSub64F(v *Value) bool {
        }
 }
 func rewriteValue386_OpSub8(v *Value) bool {
-       // match: (Sub8   x y)
+       // match: (Sub8 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -12876,7 +17033,7 @@ func rewriteValue386_OpSubPtr(v *Value) bool {
        }
 }
 func rewriteValue386_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  x)
+       // match: (Trunc16to8 x)
        // cond:
        // result: x
        for {
@@ -12900,7 +17057,7 @@ func rewriteValue386_OpTrunc32to16(v *Value) bool {
        }
 }
 func rewriteValue386_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  x)
+       // match: (Trunc32to8 x)
        // cond:
        // result: x
        for {
@@ -12938,7 +17095,7 @@ func rewriteValue386_OpXor32(v *Value) bool {
        }
 }
 func rewriteValue386_OpXor8(v *Value) bool {
-       // match: (Xor8  x y)
+       // match: (Xor8 x y)
        // cond:
        // result: (XORL x y)
        for {
@@ -13237,7 +17394,7 @@ func rewriteValue386_OpZeroExt16to32(v *Value) bool {
        }
 }
 func rewriteValue386_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  x)
+       // match: (ZeroExt8to16 x)
        // cond:
        // result: (MOVBLZX x)
        for {
@@ -13248,7 +17405,7 @@ func rewriteValue386_OpZeroExt8to16(v *Value) bool {
        }
 }
 func rewriteValue386_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  x)
+       // match: (ZeroExt8to32 x)
        // cond:
        // result: (MOVBLZX x)
        for {
@@ -13590,7 +17747,7 @@ func rewriteBlock386(b *Block) bool {
                        return true
                }
        case BlockIf:
-               // match: (If (SETL  cmp) yes no)
+               // match: (If (SETL cmp) yes no)
                // cond:
                // result: (LT  cmp yes no)
                for {
@@ -13624,7 +17781,7 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETG  cmp) yes no)
+               // match: (If (SETG cmp) yes no)
                // cond:
                // result: (GT  cmp yes no)
                for {
@@ -13692,7 +17849,7 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETB  cmp) yes no)
+               // match: (If (SETB cmp) yes no)
                // cond:
                // result: (ULT cmp yes no)
                for {
@@ -13726,343 +17883,707 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETA  cmp) yes no)
+               // match: (If (SETA cmp) yes no)
+               // cond:
+               // result: (UGT cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETA {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386UGT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETAE cmp) yes no)
+               // cond:
+               // result: (UGE cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETAE {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386UGE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETGF cmp) yes no)
+               // cond:
+               // result: (UGT  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETGF {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386UGT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETGEF cmp) yes no)
+               // cond:
+               // result: (UGE  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETGEF {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386UGE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETEQF cmp) yes no)
+               // cond:
+               // result: (EQF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETEQF {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386EQF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETNEF cmp) yes no)
+               // cond:
+               // result: (NEF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETNEF {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386NEF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If cond yes no)
+               // cond:
+               // result: (NE (TESTB cond cond) yes no)
+               for {
+                       v := b.Control
+                       _ = v
+                       cond := b.Control
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386NE
+                       v0 := b.NewValue0(v.Pos, Op386TESTB, TypeFlags)
+                       v0.AddArg(cond)
+                       v0.AddArg(cond)
+                       b.SetControl(v0)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+       case Block386LE:
+               // match: (LE (InvertFlags cmp) yes no)
+               // cond:
+               // result: (GE cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386InvertFlags {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386GE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagEQ) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagEQ {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagLT_ULT) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagLT_ULT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagLT_UGT) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagLT_UGT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagGT_ULT) yes no)
+               // cond:
+               // result: (First nil no yes)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagGT_ULT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
+                       _ = no
+                       _ = yes
+                       return true
+               }
+               // match: (LE (FlagGT_UGT) yes no)
+               // cond:
+               // result: (First nil no yes)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagGT_UGT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
+                       _ = no
+                       _ = yes
+                       return true
+               }
+       case Block386LT:
+               // match: (LT (InvertFlags cmp) yes no)
                // cond:
-               // result: (UGT cmp yes no)
+               // result: (GT cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386SETA {
+                       if v.Op != Op386InvertFlags {
                                break
                        }
                        cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGT
+                       b.Kind = Block386GT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (If (SETAE cmp) yes no)
+               // match: (LT (FlagEQ) yes no)
                // cond:
-               // result: (UGE cmp yes no)
+               // result: (First nil no yes)
                for {
                        v := b.Control
-                       if v.Op != Op386SETAE {
+                       if v.Op != Op386FlagEQ {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGE
-                       b.SetControl(cmp)
-                       _ = yes
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
                        _ = no
+                       _ = yes
                        return true
                }
-               // match: (If (SETGF  cmp) yes no)
+               // match: (LT (FlagLT_ULT) yes no)
                // cond:
-               // result: (UGT  cmp yes no)
+               // result: (First nil yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386SETGF {
+                       if v.Op != Op386FlagLT_ULT {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGT
-                       b.SetControl(cmp)
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (If (SETGEF cmp) yes no)
+               // match: (LT (FlagLT_UGT) yes no)
                // cond:
-               // result: (UGE  cmp yes no)
+               // result: (First nil yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386SETGEF {
+                       if v.Op != Op386FlagLT_UGT {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGE
-                       b.SetControl(cmp)
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (If (SETEQF cmp) yes no)
+               // match: (LT (FlagGT_ULT) yes no)
                // cond:
-               // result: (EQF  cmp yes no)
+               // result: (First nil no yes)
                for {
                        v := b.Control
-                       if v.Op != Op386SETEQF {
+                       if v.Op != Op386FlagGT_ULT {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386EQF
-                       b.SetControl(cmp)
-                       _ = yes
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
                        _ = no
+                       _ = yes
                        return true
                }
-               // match: (If (SETNEF cmp) yes no)
+               // match: (LT (FlagGT_UGT) yes no)
                // cond:
-               // result: (NEF  cmp yes no)
+               // result: (First nil no yes)
                for {
                        v := b.Control
-                       if v.Op != Op386SETNEF {
+                       if v.Op != Op386FlagGT_UGT {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386NEF
-                       b.SetControl(cmp)
-                       _ = yes
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
                        _ = no
+                       _ = yes
                        return true
                }
-               // match: (If cond yes no)
+       case Block386NE:
+               // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no)
                // cond:
-               // result: (NE (TESTB cond cond) yes no)
+               // result: (LT  cmp yes no)
                for {
                        v := b.Control
-                       _ = v
-                       cond := b.Control
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETL {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETL {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386NE
-                       v0 := b.NewValue0(v.Pos, Op386TESTB, TypeFlags)
-                       v0.AddArg(cond)
-                       v0.AddArg(cond)
-                       b.SetControl(v0)
+                       b.Kind = Block386LT
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-       case Block386LE:
-               // match: (LE (InvertFlags cmp) yes no)
+               // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no)
                // cond:
-               // result: (GE cmp yes no)
+               // result: (LT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386InvertFlags {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETL {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETL {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386GE
+                       b.Kind = Block386LT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagEQ) yes no)
+               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (LE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagEQ {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETLE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETLE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386LE
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagLT_ULT) yes no)
+               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (LE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagLT_ULT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETLE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETLE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386LE
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagLT_UGT) yes no)
+               // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (GT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagLT_UGT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETG {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETG {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386GT
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagGT_ULT) yes no)
+               // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (GT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagGT_ULT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETG {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETG {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386GT
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-               // match: (LE (FlagGT_UGT) yes no)
+               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (GE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagGT_UGT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETGE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETGE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386GE
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-       case Block386LT:
-               // match: (LT (InvertFlags cmp) yes no)
+               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
                // cond:
-               // result: (GT cmp yes no)
+               // result: (G cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386InvertFlags {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETGE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETGE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386GT
+                       b.Kind = Block386GE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LT (FlagEQ) yes no)
+               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (EQ  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagEQ {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETEQ {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETEQ {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386EQ
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-               // match: (LT (FlagLT_ULT) yes no)
+               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (EQ  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagLT_ULT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETEQ {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETEQ {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386EQ
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LT (FlagLT_UGT) yes no)
+               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (NE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagLT_UGT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETNE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETNE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386NE
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LT (FlagGT_ULT) yes no)
+               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (NE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagGT_ULT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETNE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETNE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386NE
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-               // match: (LT (FlagGT_UGT) yes no)
+               // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (ULT cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagGT_UGT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETB {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETB {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386ULT
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-       case Block386NE:
-               // match: (NE (TESTB (SETL  cmp) (SETL  cmp)) yes no)
+               // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no)
                // cond:
-               // result: (LT  cmp yes no)
+               // result: (ULT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETL {
+                       if v_0.Op != Op386SETB {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETL {
+                       if v_1.Op != Op386SETB {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14070,27 +18591,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386LT
+                       b.Kind = Block386ULT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
+               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
                // cond:
-               // result: (LE  cmp yes no)
+               // result: (ULE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETLE {
+                       if v_0.Op != Op386SETBE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETLE {
+                       if v_1.Op != Op386SETBE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14098,27 +18619,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386LE
+                       b.Kind = Block386ULE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETG  cmp) (SETG  cmp)) yes no)
+               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
                // cond:
-               // result: (GT  cmp yes no)
+               // result: (ULE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETG {
+                       if v_0.Op != Op386SETBE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETG {
+                       if v_1.Op != Op386SETBE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14126,27 +18647,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386GT
+                       b.Kind = Block386ULE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
+               // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no)
                // cond:
-               // result: (GE  cmp yes no)
+               // result: (UGT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETGE {
+                       if v_0.Op != Op386SETA {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETGE {
+                       if v_1.Op != Op386SETA {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14154,27 +18675,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386GE
+                       b.Kind = Block386UGT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
+               // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no)
                // cond:
-               // result: (EQ  cmp yes no)
+               // result: (UGT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETEQ {
+                       if v_0.Op != Op386SETA {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETEQ {
+                       if v_1.Op != Op386SETA {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14182,27 +18703,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386EQ
+                       b.Kind = Block386UGT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
+               // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no)
                // cond:
-               // result: (NE  cmp yes no)
+               // result: (UGE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETNE {
+                       if v_0.Op != Op386SETAE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETNE {
+                       if v_1.Op != Op386SETAE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14210,27 +18731,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386NE
+                       b.Kind = Block386UGE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETB  cmp) (SETB  cmp)) yes no)
+               // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no)
                // cond:
-               // result: (ULT cmp yes no)
+               // result: (UGE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETB {
+                       if v_0.Op != Op386SETAE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETB {
+                       if v_1.Op != Op386SETAE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14238,27 +18759,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386ULT
+                       b.Kind = Block386UGE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
+               // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no)
                // cond:
-               // result: (ULE cmp yes no)
+               // result: (UGT  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETBE {
+                       if v_0.Op != Op386SETGF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETBE {
+                       if v_1.Op != Op386SETGF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14266,27 +18787,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386ULE
+                       b.Kind = Block386UGT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETA  cmp) (SETA  cmp)) yes no)
+               // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no)
                // cond:
-               // result: (UGT cmp yes no)
+               // result: (UGT  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETA {
+                       if v_0.Op != Op386SETGF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETA {
+                       if v_1.Op != Op386SETGF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14300,21 +18821,21 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no)
+               // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no)
                // cond:
-               // result: (UGE cmp yes no)
+               // result: (UGE  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETAE {
+                       if v_0.Op != Op386SETGEF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETAE {
+                       if v_1.Op != Op386SETGEF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14328,21 +18849,21 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETGF  cmp) (SETGF  cmp)) yes no)
+               // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no)
                // cond:
-               // result: (UGT  cmp yes no)
+               // result: (UGE  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETGF {
+                       if v_0.Op != Op386SETGEF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETGF {
+                       if v_1.Op != Op386SETGEF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14350,27 +18871,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGT
+                       b.Kind = Block386UGE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no)
+               // match: (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no)
                // cond:
-               // result: (UGE  cmp yes no)
+               // result: (EQF  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETGEF {
+                       if v_0.Op != Op386SETEQF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETGEF {
+                       if v_1.Op != Op386SETEQF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14378,7 +18899,7 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGE
+                       b.Kind = Block386EQF
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
@@ -14440,6 +18961,34 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
+               // match: (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no)
+               // cond:
+               // result: (NEF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETNEF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETNEF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386NEF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
                // match: (NE (InvertFlags cmp) yes no)
                // cond:
                // result: (NE cmp yes no)
index e31d3b453a8324affa7a089b5021d21a1cdd026a..fe4b7a0c00696080a8b5e7ea3fb1cef05a6ba7d6 100644 (file)
@@ -858,9 +858,9 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL (SHLLconst x [c]) (SHRLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [   c])
+       // match: (ADDL (SHLLconst x [c]) (SHRLconst x [d]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLLconst {
@@ -872,10 +872,11 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                if v_1.Op != OpAMD64SHRLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
@@ -883,34 +884,35 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL (SHRLconst x [c]) (SHLLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [32-c])
+       // match: (ADDL (SHRLconst x [d]) (SHLLconst x [c]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (ADDL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -923,13 +925,11 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                if v_1.Op != OpAMD64SHRWconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
@@ -937,38 +937,36 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (ADDL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
-               v.AuxInt = 16 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (ADDL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -981,13 +979,11 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                if v_1.Op != OpAMD64SHRBconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
@@ -995,32 +991,30 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
+       // match: (ADDL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRBconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
-               v.AuxInt = 8 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -1039,6 +1033,21 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (NEGL y) x)
+       // cond:
+       // result: (SUBL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGL {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64SUBL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x l:(MOVLload [off] {sym} ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
        // result: (ADDLmem x [off] {sym} ptr mem)
@@ -1194,9 +1203,9 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDQ (SHLQconst x [c]) (SHRQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [   c])
+       // match: (ADDQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLQconst {
@@ -1208,10 +1217,11 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpAMD64ROLQconst)
@@ -1219,28 +1229,29 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDQ (SHRQconst x [c]) (SHLQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [64-c])
+       // match: (ADDQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpAMD64ROLQconst)
-               v.AuxInt = 64 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -1262,6 +1273,24 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (SHLQconst [3] y) x)
+       // cond:
+       // result: (LEAQ8 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x (SHLQconst [2] y))
        // cond:
        // result: (LEAQ4 x y)
@@ -1280,6 +1309,24 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (SHLQconst [2] y) x)
+       // cond:
+       // result: (LEAQ4 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x (SHLQconst [1] y))
        // cond:
        // result: (LEAQ2 x y)
@@ -1298,6 +1345,24 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (SHLQconst [1] y) x)
+       // cond:
+       // result: (LEAQ2 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x (ADDQ y y))
        // cond:
        // result: (LEAQ2 x y)
@@ -1316,6 +1381,24 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (ADDQ y y) x)
+       // cond:
+       // result: (LEAQ2 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               y := v_0.Args[0]
+               if y != v_0.Args[1] {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x (ADDQ x y))
        // cond:
        // result: (LEAQ2 y x)
@@ -1352,6 +1435,42 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (ADDQ (ADDQ x y) x)
+       // cond:
+       // result: (LEAQ2 y x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADDQ (ADDQ y x) x)
+       // cond:
+       // result: (LEAQ2 y x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
        // match: (ADDQ (ADDQconst [c] x) y)
        // cond:
        // result: (LEAQ1 [c] x y)
@@ -1369,17 +1488,17 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (ADDQ x (ADDQconst [c] y))
+       // match: (ADDQ y (ADDQconst [c] x))
        // cond:
        // result: (LEAQ1 [c] x y)
        for {
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
                c := v_1.AuxInt
-               y := v_1.Args[0]
+               x := v_1.Args[0]
                v.reset(OpAMD64LEAQ1)
                v.AuxInt = c
                v.AddArg(x)
@@ -1408,7 +1527,7 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (ADDQ (LEAQ [c] {s} x) y)
+       // match: (ADDQ (LEAQ [c] {s} y) x)
        // cond: x.Op != OpSB && y.Op != OpSB
        // result: (LEAQ1 [c] {s} x y)
        for {
@@ -1418,8 +1537,8 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                }
                c := v_0.AuxInt
                s := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
@@ -1445,6 +1564,21 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (NEGQ y) x)
+       // cond:
+       // result: (SUBQ x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64SUBQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x l:(MOVQload [off] {sym} ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
        // result: (ADDQmem x [off] {sym} ptr mem)
@@ -3356,20 +3490,20 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (ADDQconst [d] y))
-       // cond: is32Bit(c+d)   && y.Op != OpSB
+       // match: (LEAQ1 [c] {s} y (ADDQconst [d] x))
+       // cond: is32Bit(c+d)   && x.Op != OpSB
        // result: (LEAQ1 [c+d] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
                d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+d) && y.Op != OpSB) {
+               x := v_1.Args[0]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
                v.reset(OpAMD64LEAQ1)
@@ -3401,9 +3535,9 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [1] x) y)
+       // match: (LEAQ1 [c] {s} (SHLQconst [1] y) x)
        // cond:
-       // result: (LEAQ2 [c] {s} y x)
+       // result: (LEAQ2 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -3414,13 +3548,13 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                if v_0.AuxInt != 1 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(OpAMD64LEAQ2)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAQ1 [c] {s} x (SHLQconst [2] y))
@@ -3445,9 +3579,9 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [2] x) y)
+       // match: (LEAQ1 [c] {s} (SHLQconst [2] y) x)
        // cond:
-       // result: (LEAQ4 [c] {s} y x)
+       // result: (LEAQ4 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -3458,13 +3592,13 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                if v_0.AuxInt != 2 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(OpAMD64LEAQ4)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAQ1 [c] {s} x (SHLQconst [3] y))
@@ -3489,9 +3623,9 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [3] x) y)
+       // match: (LEAQ1 [c] {s} (SHLQconst [3] y) x)
        // cond:
-       // result: (LEAQ8 [c] {s} y x)
+       // result: (LEAQ8 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -3502,13 +3636,13 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                if v_0.AuxInt != 3 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(OpAMD64LEAQ8)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
@@ -3535,21 +3669,21 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [off1] {sym1} x (LEAQ [off2] {sym2} y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB
+       // match: (LEAQ1 [off1] {sym1} y (LEAQ [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
        // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64LEAQ {
                        break
                }
                off2 := v_1.AuxInt
                sym2 := v_1.Aux
-               y := v_1.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB) {
+               x := v_1.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
                v.reset(OpAMD64LEAQ1)
@@ -4193,7 +4327,7 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBload  [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -4216,7 +4350,7 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBload  [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -4290,7 +4424,7 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -4314,7 +4448,7 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBload  [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -4362,6 +4496,28 @@ func rewriteValueAMD64_OpAMD64MOVBloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond:
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
        // cond:
        // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
@@ -4384,6 +4540,28 @@ func rewriteValueAMD64_OpAMD64MOVBloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
@@ -4431,7 +4609,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -4479,7 +4657,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -4559,7 +4737,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w   x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
+       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
        // cond: x0.Uses == 1   && clobber(x0)
        // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
        for {
@@ -4605,7 +4783,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w   x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)   x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)   x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
+       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
        // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
        // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
        for {
@@ -4696,7 +4874,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w   x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)   x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)   x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)   x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)   x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)   x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)   x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
+       // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
        // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
        // result: (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
        for {
@@ -4970,7 +5148,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -4996,7 +5174,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -5304,6 +5482,30 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstoreidx1 [c] {sym} idx (ADDQconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
        // cond:
        // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
@@ -5328,7 +5530,82 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w   x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
+       // match: (MOVBstoreidx1 [c] {sym} (ADDQconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
+       // cond: x0.Uses == 1   && clobber(x0)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x0 := v.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRWconst {
+                       break
+               }
+               if x0_2.AuxInt != 8 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && clobber(x0)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
+               v0.AuxInt = 8
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x0:(MOVBstoreidx1 [i-1] {s} idx p (SHRWconst [8] w) mem))
        // cond: x0.Uses == 1   && clobber(x0)
        // result: (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
        for {
@@ -5347,6 +5624,57 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                if x0.Aux != s {
                        break
                }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRWconst {
+                       break
+               }
+               if x0_2.AuxInt != 8 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && clobber(x0)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
+               v0.AuxInt = 8
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
+       // cond: x0.Uses == 1   && clobber(x0)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x0 := v.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
                if p != x0.Args[0] {
                        break
                }
@@ -5379,7 +5707,58 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w   x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w)   x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w)   x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // match: (MOVBstoreidx1 [i] {s} idx p w x0:(MOVBstoreidx1 [i-1] {s} idx p (SHRWconst [8] w) mem))
+       // cond: x0.Uses == 1   && clobber(x0)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x0 := v.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRWconst {
+                       break
+               }
+               if x0_2.AuxInt != 8 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && clobber(x0)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
+               v0.AuxInt = 8
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
        // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
        // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
@@ -5481,124 +5860,122 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w   x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w)   x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w)   x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w)   x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w)   x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w)   x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w)   x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
-       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} idx p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
                p := v.Args[0]
                idx := v.Args[1]
                w := v.Args[2]
-               x6 := v.Args[3]
-               if x6.Op != OpAMD64MOVBstoreidx1 {
-                       break
-               }
-               if x6.AuxInt != i-1 {
-                       break
-               }
-               if x6.Aux != s {
-                       break
-               }
-               if p != x6.Args[0] {
-                       break
-               }
-               if idx != x6.Args[1] {
-                       break
-               }
-               x6_2 := x6.Args[2]
-               if x6_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if x6_2.AuxInt != 8 {
-                       break
-               }
-               if w != x6_2.Args[0] {
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               x5 := x6.Args[3]
-               if x5.Op != OpAMD64MOVBstoreidx1 {
+               if x2.AuxInt != i-1 {
                        break
                }
-               if x5.AuxInt != i-2 {
+               if x2.Aux != s {
                        break
                }
-               if x5.Aux != s {
+               if p != x2.Args[0] {
                        break
                }
-               if p != x5.Args[0] {
+               if idx != x2.Args[1] {
                        break
                }
-               if idx != x5.Args[1] {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               x5_2 := x5.Args[2]
-               if x5_2.Op != OpAMD64SHRQconst {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               if x5_2.AuxInt != 16 {
+               if w != x2_2.Args[0] {
                        break
                }
-               if w != x5_2.Args[0] {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               x4 := x5.Args[3]
-               if x4.Op != OpAMD64MOVBstoreidx1 {
+               if x1.AuxInt != i-2 {
                        break
                }
-               if x4.AuxInt != i-3 {
+               if x1.Aux != s {
                        break
                }
-               if x4.Aux != s {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x4.Args[0] {
+               if idx != x1.Args[1] {
                        break
                }
-               if idx != x4.Args[1] {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               x4_2 := x4.Args[2]
-               if x4_2.Op != OpAMD64SHRQconst {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               if x4_2.AuxInt != 24 {
+               if w != x1_2.Args[0] {
                        break
                }
-               if w != x4_2.Args[0] {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               x3 := x4.Args[3]
-               if x3.Op != OpAMD64MOVBstoreidx1 {
+               if x0.AuxInt != i-3 {
                        break
                }
-               if x3.AuxInt != i-4 {
+               if x0.Aux != s {
                        break
                }
-               if x3.Aux != s {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x3.Args[0] {
+               if p != x0.Args[1] {
                        break
                }
-               if idx != x3.Args[1] {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               x3_2 := x3.Args[2]
-               if x3_2.Op != OpAMD64SHRQconst {
+               if x0_2.AuxInt != 24 {
                        break
                }
-               if x3_2.AuxInt != 32 {
+               if w != x0_2.Args[0] {
                        break
                }
-               if w != x3_2.Args[0] {
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               x2 := x3.Args[3]
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} idx p (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
                if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if x2.AuxInt != i-5 {
+               if x2.AuxInt != i-1 {
                        break
                }
                if x2.Aux != s {
@@ -5611,10 +5988,10 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                        break
                }
                x2_2 := x2.Args[2]
-               if x2_2.Op != OpAMD64SHRQconst {
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               if x2_2.AuxInt != 40 {
+               if x2_2.AuxInt != 8 {
                        break
                }
                if w != x2_2.Args[0] {
@@ -5624,23 +6001,23 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if x1.AuxInt != i-6 {
+               if x1.AuxInt != i-2 {
                        break
                }
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                x1_2 := x1.Args[2]
-               if x1_2.Op != OpAMD64SHRQconst {
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               if x1_2.AuxInt != 48 {
+               if x1_2.AuxInt != 16 {
                        break
                }
                if w != x1_2.Args[0] {
@@ -5650,7 +6027,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if x0.AuxInt != i-7 {
+               if x0.AuxInt != i-3 {
                        break
                }
                if x0.Aux != s {
@@ -5663,8268 +6040,75795 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                        break
                }
                x0_2 := x0.Args[2]
-               if x0_2.Op != OpAMD64SHRQconst {
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               if x0_2.AuxInt != 56 {
+               if x0_2.AuxInt != 24 {
                        break
                }
                if w != x0_2.Args[0] {
                        break
                }
                mem := x0.Args[3]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 7
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} idx p (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} idx p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
                p := v.Args[0]
                idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if v_2.AuxInt != 8 {
+               if x2.AuxInt != i-1 {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 {
+               if x2.Aux != s {
                        break
                }
-               if x.AuxInt != i-1 {
+               if p != x2.Args[0] {
                        break
                }
-               if x.Aux != s {
+               if idx != x2.Args[1] {
                        break
                }
-               if p != x.Args[0] {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               if idx != x.Args[1] {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               if w != x.Args[2] {
+               if w != x2_2.Args[0] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               if x1.AuxInt != i-2 {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 {
+               if x1.Aux != s {
                        break
                }
-               if x.AuxInt != i-1 {
+               if idx != x1.Args[0] {
                        break
                }
-               if x.Aux != s {
+               if p != x1.Args[1] {
                        break
                }
-               if p != x.Args[0] {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               if idx != x.Args[1] {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
+               if w != x1_2.Args[0] {
                        break
                }
-               if w0.AuxInt != j-8 {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if w != w0.Args[0] {
+               if x0.AuxInt != i-3 {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x0_2.AuxInt != 24 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} idx p (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if x2.AuxInt != i-1 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               if x2.Aux != s {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x2.Args[0] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVLQSX (ANDLconst [c] x))
-       // cond: c & 0x80000000 == 0
-       // result: (ANDLconst [c & 0x7fffffff] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               if p != x2.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c&0x80000000 == 0) {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0x7fffffff
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQSX x:(MOVLQSX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLQSX {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQSX x:(MOVWQSX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWQSX {
+               if w != x2_2.Args[0] {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQSX x:(MOVBQSX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVBQSX {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLQSXload(v *Value) bool {
-       // match: (MOVLQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x1.AuxInt != i-2 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLQSXload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVLQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               if p != x1.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x1.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLloadidx1 {
+               if w != x1_2.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLloadidx4 {
+               if x0.AuxInt != i-3 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if x0.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx4, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVLQZX (ANDLconst [c] x))
-       // cond:
-       // result: (ANDLconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               if p != x0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVLQZX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLQZX {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVWQZX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWQZX {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVBQZX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVBQZX {
+               if x0_2.AuxInt != 24 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value) bool {
-       // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLatomicload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if w != x0_2.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               v.reset(OpAMD64MOVLatomicload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} idx p (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} idx p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x2.AuxInt != i-1 {
                        break
                }
-               v.reset(OpAMD64MOVLatomicload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
-       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLstore {
+               if x2.Aux != s {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if idx != x2.Args[0] {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLload  [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if p != x2.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload  [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w != x2_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x1.AuxInt != i-2 {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if x1.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if idx != x1.Args[1] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if w != x1_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload  [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if x0.AuxInt != i-3 {
                        break
                }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLloadidx1(v *Value) bool {
-       // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
-       // cond:
-       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if x0.Aux != s {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if idx != x0.Args[0] {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if p != x0.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x0_2.AuxInt != 24 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLloadidx4(v *Value) bool {
-       // match: (MOVLloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} idx p (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} idx p (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
-       // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem)
-       // cond:
-       // result: (MOVLstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLQSX {
+               if x2.AuxInt != i-1 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem)
-       // cond:
-       // result: (MOVLstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLQZX {
+               if x2.Aux != s {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if idx != x2.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if p != x2.Args[1] {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore  [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if w != x2_2.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if x1.AuxInt != i-2 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if idx != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVQstore [i-4] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
+               if w != x1_2.Args[0] {
                        break
                }
-               if v_1.AuxInt != 32 {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVLstore {
+               if x0.AuxInt != i-3 {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x0.Aux != s {
                        break
                }
-               if x.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if w != x.Args[1] {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if x0_2.AuxInt != 24 {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = i - 4
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
                v.Aux = s
                v.AddArg(p)
-               v.AddArg(w)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVQstore [i-4] {s} p w0 mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} idx p (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} idx p (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} idx p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
                p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVLstore {
+               idx := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x2.AuxInt != i-1 {
                        break
                }
-               if x.Aux != s {
+               if x2.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x2.Args[0] {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst {
+               if p != x2.Args[1] {
                        break
                }
-               if w0.AuxInt != j-32 {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               if w != w0.Args[0] {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if w != x2_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2)) {
+               if x1.AuxInt != i-2 {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if x1.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if idx != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if p != x1.Args[1] {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if w != x1_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if x0.AuxInt != i-3 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if x0.Aux != s {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconst [x] {sym} (ADDQ ptr idx) mem)
-       // cond:
-       // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if p != x0.Args[1] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64MOVLstoreconst {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if x0_2.AuxInt != 24 {
                        break
                }
-               if p != x.Args[0] {
+               if w != x0_2.Args[0] {
                        break
                }
-               mem := x.Args[1]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = ValAndOff(a).Off()
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
                v.Aux = s
                v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, types.UInt64)
-               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if x2.AuxInt != i-1 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if x2.Aux != s {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if p != x2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
-       // cond:
-       // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if idx != x2.Args[1] {
                        break
                }
-               if v_1.AuxInt != 2 {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond:
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if w != x2_2.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVLstoreconstidx1 {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if x1.AuxInt != i-2 {
                        break
                }
-               if p != x.Args[0] {
+               if x1.Aux != s {
                        break
                }
-               if i != x.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = ValAndOff(a).Off()
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, types.UInt64)
-               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstoreconstidx4(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond:
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(4 * c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVLstoreconstidx4 {
+               if w != x1_2.Args[0] {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if p != x.Args[0] {
+               if x0.AuxInt != i-3 {
                        break
                }
-               if i != x.Args[1] {
+               if x0.Aux != s {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = ValAndOff(a).Off()
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
-               v0.AuxInt = 2
-               v0.AddArg(i)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQconst, types.UInt64)
-               v1.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-               v.AddArg(v1)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstoreidx1(v *Value) bool {
-       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
-       // cond:
-       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if idx != x0.Args[1] {
                        break
                }
-               if v_1.AuxInt != 2 {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x0_2.AuxInt != 24 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
                v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p idx w mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} idx p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if v_2.AuxInt != 32 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx1 {
-                       break
-               }
-               if x.AuxInt != i-4 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               if p != x.Args[0] {
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if idx != x.Args[1] {
+               if x2.AuxInt != i-1 {
                        break
                }
-               if w != x.Args[2] {
+               if x2.Aux != s {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               if idx != x2.Args[1] {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx1 {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               if w != x2_2.Args[0] {
                        break
                }
-               if p != x.Args[0] {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if idx != x.Args[1] {
+               if x1.AuxInt != i-2 {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
+               if x1.Aux != s {
                        break
                }
-               if w0.AuxInt != j-32 {
+               if p != x1.Args[0] {
                        break
                }
-               if w != w0.Args[0] {
+               if idx != x1.Args[1] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstoreidx4(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVLstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if w != x1_2.Args[0] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if v_2.AuxInt != 32 {
+               if x0.AuxInt != i-3 {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx4 {
+               if x0.Aux != s {
                        break
                }
-               if x.AuxInt != i-4 {
+               if idx != x0.Args[0] {
                        break
                }
-               if x.Aux != s {
+               if p != x0.Args[1] {
                        break
                }
-               if p != x.Args[0] {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               if idx != x.Args[1] {
+               if x0_2.AuxInt != 24 {
                        break
                }
-               if w != x.Args[2] {
+               if w != x0_2.Args[0] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
                v.Aux = s
                v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
                v.AddArg(v0)
-               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} idx p (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx4 {
+               if x2.AuxInt != i-1 {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x2.Aux != s {
                        break
                }
-               if x.Aux != s {
+               if p != x2.Args[0] {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x2.Args[1] {
                        break
                }
-               if idx != x.Args[1] {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               if w0.AuxInt != j-32 {
+               if w != x2_2.Args[0] {
                        break
                }
-               if w != w0.Args[0] {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if x1.AuxInt != i-2 {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x1_2.AuxInt != 16 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-3 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x0_2.AuxInt != 24 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
                v.Aux = s
                v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
                v.AddArg(v0)
-               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
-       // match: (MOVOload  [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVOload  [off1+off2] {sym} ptr mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} idx p (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} idx p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if x2.AuxInt != i-1 {
                        break
                }
-               v.reset(OpAMD64MOVOload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x2.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVOload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
-       // match: (MOVOstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVOstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if idx != x2.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVOstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w != x2_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVOstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool {
-       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if x1.AuxInt != i-2 {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x1.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if idx != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
-       // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQstore {
+               if p != x1.Args[1] {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVQload  [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if w != x1_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQload  [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x0.AuxInt != i-3 {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if x0.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if p != x0.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVQloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if x0_2.AuxInt != 24 {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               if w != x0_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p w x2:(MOVBstoreidx1 [i-1] {s} idx p (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if x2.AuxInt != i-1 {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQload  [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if x2.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if idx != x2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQloadidx1(v *Value) bool {
-       // match: (MOVQloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
-       // cond:
-       // result: (MOVQloadidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if p != x2.Args[1] {
                        break
                }
-               if v_1.AuxInt != 3 {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQloadidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if w != x2_2.Args[0] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQloadidx8(v *Value) bool {
-       // match: (MOVQloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVQloadidx8 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQloadidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVQloadidx8 [c+8*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if x1.AuxInt != i-2 {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQloadidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
-       // match: (MOVQstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x1.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem)
-       // cond: validValAndOff(c,off)
-       // result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if idx != x1.Args[1] {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(c, off)) {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstore  [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w != x1_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x0.AuxInt != i-3 {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if x0.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVQstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if idx != x0.Args[1] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x0_2.AuxInt != 24 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVQstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p w x2:(MOVBstoreidx1 [i-1] {s} idx p (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} idx p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2)) {
+               if x2.AuxInt != i-1 {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if x2.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if idx != x2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
-       // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if p != x2.Args[1] {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x2_2.AuxInt != 8 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if w != x2_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if x1.AuxInt != i-2 {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if x1.Aux != s {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreconst [x] {sym} (ADDQ ptr idx) mem)
-       // cond:
-       // result: (MOVQstoreconstidx1 [x] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if idx != x1.Args[1] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               v.reset(OpAMD64MOVQstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if x1_2.AuxInt != 16 {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if w != x1_2.Args[0] {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1(v *Value) bool {
-       // match: (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
-       // cond:
-       // result: (MOVQstoreconstidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if x0.AuxInt != i-3 {
                        break
                }
-               if v_1.AuxInt != 3 {
+               if x0.Aux != s {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if idx != x0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond:
-       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if p != x0.Args[1] {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreconstidx8(v *Value) bool {
-       // match: (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond:
-       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if x0_2.AuxInt != 24 {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = ValAndOff(x).add(8 * c)
-               v.Aux = sym
-               v.AddArg(ptr)
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreidx1(v *Value) bool {
-       // match: (MOVQstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
-       // cond:
-       // result: (MOVQstoreidx8 [c] {sym} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p w x2:(MOVBstoreidx1 [i-1] {s} idx p (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} idx p (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if v_1.AuxInt != 3 {
+               if x2.AuxInt != i-1 {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVQstoreidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x2_2.AuxInt != 8 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x1_2.AuxInt != 16 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-3 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x0_2.AuxInt != 24 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p w x2:(MOVBstoreidx1 [i-1] {s} idx p (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} idx p (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} idx p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x2.AuxInt != i-1 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x2_2.AuxInt != 8 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x1_2.AuxInt != 16 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-3 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x0_2.AuxInt != 24 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
                v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreidx8(v *Value) bool {
-       // match: (MOVQstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVQstoreidx8 [c+d] {sym} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVQstoreidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVQstoreidx8 [c+8*d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if x6.AuxInt != i-1 {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVQstoreidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool {
-       // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSDload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x6:(MOVBstoreidx1 [i-1] {s} idx p (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} idx p (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} idx p (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} idx p (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} idx p (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} idx p (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} idx p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               if idx != x6.Args[0] {
+                       break
+               }
+               if p != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               if idx != x5.Args[0] {
+                       break
+               }
+               if p != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               if idx != x4.Args[0] {
+                       break
+               }
+               if p != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               if idx != x3.Args[0] {
+                       break
+               }
+               if p != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRQconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRQconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQSX (ANDLconst [c] x))
+       // cond: c & 0x80000000 == 0
+       // result: (ANDLconst [c & 0x7fffffff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x80000000 == 0) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0x7fffffff
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQSX x:(MOVLQSX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLQSX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQSX x:(MOVWQSX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWQSX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQSX x:(MOVBQSX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBQSX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLQSXload(v *Value) bool {
+       // match: (MOVLQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLQSXload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVLQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLloadidx4 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx4, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQZX (ANDLconst [c] x))
+       // cond:
+       // result: (ANDLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVLQZX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLQZX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVWQZX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWQZX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVBQZX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBQZX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value) bool {
+       // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLatomicload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
+       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLloadidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVLloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVLloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLloadidx1(v *Value) bool {
+       // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
+       // cond:
+       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx1 [c] {sym} (SHLQconst [2] idx) ptr mem)
+       // cond:
+       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond:
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLloadidx4(v *Value) bool {
+       // match: (MOVLloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
+       // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem)
+       // cond:
+       // result: (MOVLstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLQSX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem)
+       // cond:
+       // result: (MOVLstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLQZX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = makeValAndOff(int64(int32(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstore [i-4] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVLstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstore [i-4] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVLstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconst [x] {sym} (ADDQ ptr idx) mem)
+       // cond:
+       // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.AuxInt = x
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64MOVLstoreconst {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               mem := x.Args[1]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = ValAndOff(a).Off()
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, types.UInt64)
+               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
+       // cond:
+       // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstoreconstidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond:
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVLstoreconstidx1 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = ValAndOff(a).Off()
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, types.UInt64)
+               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstoreconstidx4(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond:
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(4 * c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVLstoreconstidx4 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = ValAndOff(a).Off()
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
+               v0.AuxInt = 2
+               v0.AddArg(i)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQconst, types.UInt64)
+               v1.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
+               v.AddArg(v1)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstoreidx1(v *Value) bool {
+       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVLstoreidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} (SHLQconst [2] idx) ptr val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVLstoreidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} idx (ADDQconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} (ADDQconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [i] {s} idx p (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [i] {s} idx p (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} idx p w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [i] {s} idx p (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [i] {s} idx p (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} idx p w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstoreidx4(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVLstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVLstoreidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVLstoreidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx4 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx4 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
+       // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVOload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVOload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVOload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
+       // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVOstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVOstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVOstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool {
+       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
+       // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ8 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQloadidx8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVQloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQloadidx1(v *Value) bool {
+       // match: (MOVQloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
+       // cond:
+       // result: (MOVQloadidx8 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 3 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQloadidx1 [c] {sym} (SHLQconst [3] idx) ptr mem)
+       // cond:
+       // result: (MOVQloadidx8 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond:
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQloadidx8(v *Value) bool {
+       // match: (MOVQloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVQloadidx8 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx8)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVQloadidx8 [c+8*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx8)
+               v.AuxInt = c + 8*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
+       // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem)
+       // cond: validValAndOff(c,off)
+       // result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ8 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVQstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVQstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
+       // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ8 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreconst [x] {sym} (ADDQ ptr idx) mem)
+       // cond:
+       // result: (MOVQstoreconstidx1 [x] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.AuxInt = x
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1(v *Value) bool {
+       // match: (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
+       // cond:
+       // result: (MOVQstoreconstidx8 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 3 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond:
+       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstoreconstidx8(v *Value) bool {
+       // match: (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond:
+       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.AuxInt = ValAndOff(x).add(8 * c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstoreidx1(v *Value) bool {
+       // match: (MOVQstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
+       // cond:
+       // result: (MOVQstoreidx8 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 3 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVQstoreidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreidx1 [c] {sym} (SHLQconst [3] idx) ptr val mem)
+       // cond:
+       // result: (MOVQstoreidx8 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVQstoreidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreidx1 [c] {sym} idx (ADDQconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreidx1 [c] {sym} (ADDQconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstoreidx8(v *Value) bool {
+       // match: (MOVQstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVQstoreidx8 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVQstoreidx8)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVQstoreidx8 [c+8*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVQstoreidx8)
+               v.AuxInt = c + 8*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool {
+       // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSDload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ8 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDloadidx8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSDloadidx1(v *Value) bool {
+       // match: (MOVSDloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
+       // cond:
+       // result: (MOVSDloadidx8 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 3 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSDloadidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSDloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSDloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSDloadidx8(v *Value) bool {
+       // match: (MOVSDloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSDloadidx8)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSDloadidx8)
+               v.AuxInt = c + 8*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool {
+       // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ8 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDstoreidx8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSDstoreidx1(v *Value) bool {
+       // match: (MOVSDstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
+       // cond:
+       // result: (MOVSDstoreidx8 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 3 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSDstoreidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSDstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSDstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSDstoreidx8(v *Value) bool {
+       // match: (MOVSDstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSDstoreidx8)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSDstoreidx8)
+               v.AuxInt = c + 8*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool {
+       // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSloadidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSSloadidx1(v *Value) bool {
+       // match: (MOVSSloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
+       // cond:
+       // result: (MOVSSloadidx4 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSSloadidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSSloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSSloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSSloadidx4(v *Value) bool {
+       // match: (MOVSSloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSSloadidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSSloadidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool {
+       // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstoreidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSSstoreidx1(v *Value) bool {
+       // match: (MOVSSstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
+       // cond:
+       // result: (MOVSSstoreidx4 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSSstoreidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSSstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSSstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSSstoreidx4(v *Value) bool {
+       // match: (MOVSSstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSSstoreidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSSstoreidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWQSX (ANDLconst [c] x))
+       // cond: c & 0x8000 == 0
+       // result: (ANDLconst [c & 0x7fff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x8000 == 0) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0x7fff
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWQSX x:(MOVWQSX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWQSX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWQSX x:(MOVBQSX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBQSX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool {
+       // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWQSXload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWloadidx2 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx2, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWQZX (ANDLconst [c] x))
+       // cond:
+       // result: (ANDLconst [c & 0xffff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0xffff
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWQZX x:(MOVWQZX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWQZX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWQZX x:(MOVBQZX _))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBQZX {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
+       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVWstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ2 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWloadidx1(v *Value) bool {
+       // match: (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
+       // cond:
+       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} (SHLQconst [1] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWloadidx2(v *Value) bool {
+       // match: (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = c + 2*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
+       // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVWQSX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVWQZX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ2 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
+       // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ2 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym} (ADDQ ptr idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVWstoreconstidx1)
+               v.AuxInt = x
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64MOVWstoreconst {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               mem := x.Args[1]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1(v *Value) bool {
+       // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWstoreconstidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstoreconstidx1 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(i)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreconstidx2(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(2 * c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstoreconstidx2 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
+               v0.AuxInt = 1
+               v0.AddArg(i)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreidx1(v *Value) bool {
+       // match: (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVWstoreidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (SHLQconst [1] idx) ptr val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVWstoreidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} idx (ADDQconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (ADDQconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} idx p w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} idx p w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreidx2(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVWstoreidx2)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVWstoreidx2)
+               v.AuxInt = c + 2*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx2 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx2 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULL(v *Value) bool {
+       // match: (MULL x (MOVLconst [c]))
+       // cond:
+       // result: (MULLconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULL (MOVLconst [c]) x)
+       // cond:
+       // result: (MULLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULLconst(v *Value) bool {
+       // match: (MULLconst [c] (MULLconst [d] x))
+       // cond:
+       // result: (MULLconst [int64(int32(c * d))] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MULLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = int64(int32(c * d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] (MOVLconst [d]))
+       // cond:
+       // result: (MOVLconst [int64(int32(c*d))])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULQ(v *Value) bool {
+       // match: (MULQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (MULQconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64MULQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (MULQconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64MULQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULQconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULQconst [c] (MULQconst [d] x))
+       // cond: is32Bit(c*d)
+       // result: (MULQconst [c * d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MULQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(is32Bit(c * d)) {
+                       break
+               }
+               v.reset(OpAMD64MULQconst)
+               v.AuxInt = c * d
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [-1] x)
+       // cond:
+       // result: (NEGQ x)
+       for {
+               if v.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64NEGQ)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [0] _)
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULQconst [1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [3] x)
+       // cond:
+       // result: (LEAQ2 x x)
+       for {
+               if v.AuxInt != 3 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [5] x)
+       // cond:
+       // result: (LEAQ4 x x)
+       for {
+               if v.AuxInt != 5 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [7] x)
+       // cond:
+       // result: (LEAQ8 (NEGQ <v.Type> x) x)
+       for {
+               if v.AuxInt != 7 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, v.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [9] x)
+       // cond:
+       // result: (LEAQ8 x x)
+       for {
+               if v.AuxInt != 9 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [11] x)
+       // cond:
+       // result: (LEAQ2 x (LEAQ4 <v.Type> x x))
+       for {
+               if v.AuxInt != 11 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [13] x)
+       // cond:
+       // result: (LEAQ4 x (LEAQ2 <v.Type> x x))
+       for {
+               if v.AuxInt != 13 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [21] x)
+       // cond:
+       // result: (LEAQ4 x (LEAQ4 <v.Type> x x))
+       for {
+               if v.AuxInt != 21 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [25] x)
+       // cond:
+       // result: (LEAQ8 x (LEAQ2 <v.Type> x x))
+       for {
+               if v.AuxInt != 25 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [37] x)
+       // cond:
+       // result: (LEAQ4 x (LEAQ8 <v.Type> x x))
+       for {
+               if v.AuxInt != 37 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [41] x)
+       // cond:
+       // result: (LEAQ8 x (LEAQ4 <v.Type> x x))
+       for {
+               if v.AuxInt != 41 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [73] x)
+       // cond:
+       // result: (LEAQ8 x (LEAQ8 <v.Type> x x))
+       for {
+               if v.AuxInt != 73 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c)
+       // result: (SHLQconst [log2(c)] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
+                       break
+               }
+               v.reset(OpAMD64SUBQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ1)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-2) && c >= 34
+       // result: (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-2) && c >= 34) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ2)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 2)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-4) && c >= 68
+       // result: (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-4) && c >= 68) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ4)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 4)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-8) && c >= 136
+       // result: (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-8) && c >= 136) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 8)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] (MOVQconst [d]))
+       // cond:
+       // result: (MOVQconst [c*d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = c * d
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULSD(v *Value) bool {
+       // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSDmem x [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64MULSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSD l:(MOVSDload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSDmem x [off] {sym} ptr mem)
+       for {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64MULSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULSS(v *Value) bool {
+       // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSSmem x [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVSSload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64MULSSmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSS l:(MOVSSload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSSmem x [off] {sym} ptr mem)
+       for {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVSSload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64MULSSmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64NEGL(v *Value) bool {
+       // match: (NEGL (MOVLconst [c]))
+       // cond:
+       // result: (MOVLconst [int64(int32(-c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int64(int32(-c))
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64NEGQ(v *Value) bool {
+       // match: (NEGQ (MOVQconst [c]))
+       // cond:
+       // result: (MOVQconst [-c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = -c
+               return true
+       }
+       // match: (NEGQ (ADDQconst [c] (NEGQ x)))
+       // cond: c != -(1<<31)
+       // result: (ADDQconst [-c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               x := v_0_0.Args[0]
+               if !(c != -(1 << 31)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = -c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64NOTL(v *Value) bool {
+       // match: (NOTL (MOVLconst [c]))
+       // cond:
+       // result: (MOVLconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = ^c
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64NOTQ(v *Value) bool {
+       // match: (NOTQ (MOVQconst [c]))
+       // cond:
+       // result: (MOVQconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = ^c
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (ORL x (MOVLconst [c]))
+       // cond:
+       // result: (ORLconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64ORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (MOVLconst [c]) x)
+       // cond:
+       // result: (ORLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64ORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (SHLLconst x [c]) (SHRLconst x [d]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (SHRLconst x [d]) (SHLLconst x [c]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRBconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)) x0:(MOVWload [i0] {s} p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y) s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))) s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)) x1:(MOVBload [i1] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y) s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))) s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORLmem x [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORLmem x [off] {sym} ptr mem)
+       for {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
+       // match: (ORLconst [c] x)
+       // cond: int32(c)==0
+       // result: x
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORLconst [c] _)
+       // cond: int32(c)==-1
+       // result: (MOVLconst [-1])
+       for {
+               c := v.AuxInt
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (ORLconst [c] (MOVLconst [d]))
+       // cond:
+       // result: (MOVLconst [c|d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = c | d
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (ORQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (ORQconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64ORQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (ORQconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64ORQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)) x0:(MOVWload [i0] {s} p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)) x0:(MOVLload [i0] {s} p mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y) s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))) s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y) s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))) s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)) x0:(MOVLloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)) x0:(MOVLloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)) x0:(MOVLloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)) x0:(MOVLloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if idx != x1.Args[0] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDloadidx1(v *Value) bool {
-       // match: (MOVSDloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
-       // cond:
-       // result: (MOVSDloadidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if mem != x1.Args[2] {
                        break
                }
-               if v_1.AuxInt != 3 {
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVSDloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDloadidx8(v *Value) bool {
-       // match: (MOVSDloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool {
-       // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if idx != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if mem != x1.Args[2] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if sh.AuxInt != 8 {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDstoreidx1(v *Value) bool {
-       // match: (MOVSDstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
-       // cond:
-       // result: (MOVSDstoreidx8 [c] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if v_1.AuxInt != 3 {
+               if p != x0.Args[0] {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if mem != x0.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDstoreidx8(v *Value) bool {
-       // match: (MOVSDstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem)
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)) x1:(MOVBload [i1] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem)
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool {
-       // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSSload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if p != x0.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if mem != x0.Args[1] {
                        break
                }
-               v.reset(OpAMD64MOVSSload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpAMD64MOVSSload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if r0.AuxInt != 8 {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if r1.AuxInt != 8 {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSloadidx1(v *Value) bool {
-       // match: (MOVSSloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
-       // cond:
-       // result: (MOVSSloadidx4 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if p != x1.Args[0] {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSSloadidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if mem != x1.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSSloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSSloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSloadidx4(v *Value) bool {
-       // match: (MOVSSloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem)
+       // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSSloadidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLload {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSSloadidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool {
-       // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if sh.AuxInt != 32 {
                        break
                }
-               v.reset(OpAMD64MOVSSstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLload {
                        break
                }
-               v.reset(OpAMD64MOVSSstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if mem != x0.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVSSstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))) r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               if sh.AuxInt != 32 {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSstoreidx1(v *Value) bool {
-       // match: (MOVSSstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
-       // cond:
-       // result: (MOVSSstoreidx4 [c] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               if v_1.AuxInt != 2 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLload {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSSstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLload {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSSstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSstoreidx4(v *Value) bool {
-       // match: (MOVSSstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
+               if mem != x1.Args[1] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y) s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWQSX (ANDLconst [c] x))
-       // cond: c & 0x8000 == 0
-       // result: (ANDLconst [c & 0x7fff] x)
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))) s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c&0x8000 == 0) {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0x7fff
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQSX x:(MOVWQSX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWQSX {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQSX x:(MOVBQSX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVBQSX {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool {
-       // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVWQSXload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
+               if p != x0.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWloadidx1 {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWloadidx2 {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx2, v.Type)
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWQZX (ANDLconst [c] x))
-       // cond:
-       // result: (ANDLconst [c & 0xffff] x)
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0xffff
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQZX x:(MOVWQZX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWQZX {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQZX x:(MOVBQZX _))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVBQZX {
+               if sh.AuxInt != 8 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
-       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVWstore {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWload  [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWload  [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if sh.AuxInt != 8 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if idx != x0.Args[1] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload  [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWloadidx1(v *Value) bool {
-       // match: (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
-       // cond:
-       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if idx != x0.Args[0] {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWloadidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if p != x0.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if mem != x0.Args[2] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWloadidx2(v *Value) bool {
-       // match: (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWloadidx2)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond:
-       // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWloadidx2)
-               v.AuxInt = c + 2*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
-       // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVWQSX {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVWQZX {
+               if sh.AuxInt != 8 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if idx != x0.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
+               if p != x0.Args[1] {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore  [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if mem != x0.Args[2] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if sh.AuxInt != 8 {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 16 {
+               if sh.AuxInt != 8 {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstore {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x.AuxInt != i-2 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if w != x.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstore {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.AuxInt != i-2 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if p != x.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if w0.AuxInt != j-16 {
+               if p != x1.Args[1] {
                        break
                }
-               if w != w0.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2)) {
+               if sh.AuxInt != 8 {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
-       // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if idx != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if p != x1.Args[1] {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if r1.AuxInt != 8 {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [x] {sym} (ADDQ ptr idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if sh.AuxInt != 16 {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64MOVWstoreconst {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if r0.AuxInt != 8 {
                        break
                }
-               if p != x.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               mem := x.Args[1]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               if p != x0.Args[0] {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if mem != x0.Args[2] {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1(v *Value) bool {
-       // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if r1.AuxInt != 8 {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstoreconstidx1 {
+               if sh.AuxInt != 16 {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if p != x.Args[0] {
+               if r0.AuxInt != 8 {
                        break
                }
-               if i != x.Args[1] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(i)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstoreconstidx2(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if p != x0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if idx != x0.Args[1] {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(2 * c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem)
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstoreconstidx2 {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if i != x.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
-               v0.AuxInt = 1
-               v0.AddArg(i)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstoreidx1(v *Value) bool {
-       // match: (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if r0.AuxInt != 8 {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVWstoreidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if v_2.AuxInt != 16 {
+               if r1.AuxInt != 8 {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x.AuxInt != i-2 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if x.Aux != s {
+               if sh.AuxInt != 16 {
                        break
                }
-               if p != x.Args[0] {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if idx != x.Args[1] {
+               if r0.AuxInt != 8 {
                        break
                }
-               if w != x.Args[2] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x.AuxInt != i-2 {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x.Aux != s {
+               if r0.AuxInt != 8 {
                        break
                }
-               if p != x.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if idx != x.Args[1] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
+               if r1.AuxInt != 8 {
                        break
                }
-               if w0.AuxInt != j-16 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if w != w0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstoreidx2(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if idx != x1.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVWstoreidx2)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if mem != x1.Args[2] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVWstoreidx2)
-               v.AuxInt = c + 2*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_2.AuxInt != 16 {
+               if sh.AuxInt != 16 {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx2 {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if r0.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if idx != x.Args[1] {
+               if r1.AuxInt != 8 {
                        break
                }
-               if w != x.Args[2] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx2 {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x.AuxInt != i-2 {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x.Aux != s {
+               if r0.AuxInt != 8 {
                        break
                }
-               if p != x.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if idx != x.Args[1] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
+               if r1.AuxInt != 8 {
                        break
                }
-               if w0.AuxInt != j-16 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if w != w0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULL(v *Value) bool {
-       // match: (MULL x (MOVLconst [c]))
-       // cond:
-       // result: (MULLconst [c] x)
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULL (MOVLconst [c]) x)
-       // cond:
-       // result: (MULLconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULLconst(v *Value) bool {
-       // match: (MULLconst [c] (MULLconst [d] x))
-       // cond:
-       // result: (MULLconst [int64(int32(c * d))] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MULLconst {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = int64(int32(c * d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] (MOVLconst [d]))
-       // cond:
-       // result: (MOVLconst [int64(int32(c*d))])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if r0.AuxInt != 8 {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int64(int32(c * d))
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULQ(v *Value) bool {
-       // match: (MULQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (MULQconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               v.reset(OpAMD64MULQconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (MULQconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if r1.AuxInt != 8 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(is32Bit(c)) {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               v.reset(OpAMD64MULQconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULQconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULQconst [c] (MULQconst [d] x))
-       // cond: is32Bit(c*d)
-       // result: (MULQconst [c * d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MULQconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(is32Bit(c * d)) {
+               if idx != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MULQconst)
-               v.AuxInt = c * d
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [-1] x)
-       // cond:
-       // result: (NEGQ x)
-       for {
-               if v.AuxInt != -1 {
+               if p != x1.Args[1] {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64NEGQ)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [0] _)
-       // cond:
-       // result: (MOVQconst [0])
-       for {
-               if v.AuxInt != 0 {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULQconst [1] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 1 {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               x := v.Args[0]
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MULQconst [3] x)
-       // cond:
-       // result: (LEAQ2 x x)
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
        for {
-               if v.AuxInt != 3 {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ2)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [5] x)
-       // cond:
-       // result: (LEAQ4 x x)
-       for {
-               if v.AuxInt != 5 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [7] x)
-       // cond:
-       // result: (LEAQ8 (NEGQ <v.Type> x) x)
-       for {
-               if v.AuxInt != 7 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, v.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [9] x)
-       // cond:
-       // result: (LEAQ8 x x)
-       for {
-               if v.AuxInt != 9 {
+               if sh.AuxInt != 32 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [11] x)
-       // cond:
-       // result: (LEAQ2 x (LEAQ4 <v.Type> x x))
-       for {
-               if v.AuxInt != 11 {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ2)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [13] x)
-       // cond:
-       // result: (LEAQ4 x (LEAQ2 <v.Type> x x))
-       for {
-               if v.AuxInt != 13 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [21] x)
-       // cond:
-       // result: (LEAQ4 x (LEAQ4 <v.Type> x x))
-       for {
-               if v.AuxInt != 21 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [25] x)
-       // cond:
-       // result: (LEAQ8 x (LEAQ2 <v.Type> x x))
-       for {
-               if v.AuxInt != 25 {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MULQconst [37] x)
-       // cond:
-       // result: (LEAQ4 x (LEAQ8 <v.Type> x x))
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
        for {
-               if v.AuxInt != 37 {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [41] x)
-       // cond:
-       // result: (LEAQ8 x (LEAQ4 <v.Type> x x))
-       for {
-               if v.AuxInt != 41 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [73] x)
-       // cond:
-       // result: (LEAQ8 x (LEAQ8 <v.Type> x x))
-       for {
-               if v.AuxInt != 73 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SHLQconst [log2(c)] x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c)) {
+               if sh.AuxInt != 32 {
                        break
                }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               v.reset(OpAMD64SUBQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c + 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c-2) && c >= 34
-       // result: (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-2) && c >= 34) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c - 2)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c-4) && c >= 68
-       // result: (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-4) && c >= 68) {
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c - 4)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c-8) && c >= 136
-       // result: (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-8) && c >= 136) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c - 8)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MULQconst [c] x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [c] (MOVQconst [d]))
-       // cond:
-       // result: (MOVQconst [c*d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = c * d
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULSD(v *Value) bool {
-       // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (MULSDmem x [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVSDload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if sh.AuxInt != 32 {
                        break
                }
-               v.reset(OpAMD64MULSDmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MULSD l:(MOVSDload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (MULSDmem x [off] {sym} ptr mem)
-       for {
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVSDload {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               v.reset(OpAMD64MULSDmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULSS(v *Value) bool {
-       // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (MULSSmem x [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVSSload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64MULSSmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MULSS l:(MOVSSload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (MULSSmem x [off] {sym} ptr mem)
-       for {
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVSSload {
+               if p != x0.Args[1] {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpAMD64MULSSmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NEGL(v *Value) bool {
-       // match: (NEGL (MOVLconst [c]))
-       // cond:
-       // result: (MOVLconst [int64(int32(-c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int64(int32(-c))
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NEGQ(v *Value) bool {
-       // match: (NEGQ (MOVQconst [c]))
-       // cond:
-       // result: (MOVQconst [-c])
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = -c
-               return true
-       }
-       // match: (NEGQ (ADDQconst [c] (NEGQ x)))
-       // cond: c != -(1<<31)
-       // result: (ADDQconst [-c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64NEGQ {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x := v_0_0.Args[0]
-               if !(c != -(1 << 31)) {
+               if sh.AuxInt != 32 {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = -c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NOTL(v *Value) bool {
-       // match: (NOTL (MOVLconst [c]))
-       // cond:
-       // result: (MOVLconst [^c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = ^c
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NOTQ(v *Value) bool {
-       // match: (NOTQ (MOVQconst [c]))
-       // cond:
-       // result: (MOVQconst [^c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = ^c
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (ORL x (MOVLconst [c]))
-       // cond:
-       // result: (ORLconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64ORLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL (MOVLconst [c]) x)
-       // cond:
-       // result: (ORLconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if idx != x0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64ORLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: ( ORL (SHLLconst x [c]) (SHRLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [   c])
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRLconst {
+               if sh.AuxInt != 32 {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               if x != v_1.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               v.reset(OpAMD64ROLLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL (SHRLconst x [c]) (SHLLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [32-c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRLconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x != v_1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64ROLLconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x)
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: ( ORL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
        for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRWconst {
+               if sh.AuxInt != 32 {
                        break
                }
-               if v_1.AuxInt != 16-c {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               if x != v_1.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRWconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if v_1.AuxInt != 16-c {
+               if p != x1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = 16 - c
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: ( ORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
        for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRBconst {
+               if sh.AuxInt != 32 {
                        break
                }
-               if v_1.AuxInt != 8-c {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               if x != v_1.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               v.reset(OpAMD64ROLBconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRBconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if v_1.AuxInt != 8-c {
+               if idx != x1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpAMD64ROLBconst)
-               v.AuxInt = 8 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORL x:(SHLLconst _) y)
-       // cond: y.Op != OpAMD64SHLLconst
-       // result: (ORL y x)
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64SHLLconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               y := v.Args[1]
-               if !(y.Op != OpAMD64SHLLconst) {
+               if sh.AuxInt != 32 {
                        break
                }
-               v.reset(OpAMD64ORL)
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL                  x0:(MOVBload [i]   {s} p mem)     s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               if s0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               if x1.AuxInt != i+1 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x1.Aux != s {
+               if idx != x1.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORL o0:(ORL                        x0:(MOVWload [i]   {s} p mem)     s0:(SHLLconst [16] x1:(MOVBload [i+2] {s} p mem)))     s1:(SHLLconst [24] x2:(MOVBload [i+3] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p mem)
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpAMD64ORL {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               if s0.AuxInt != 16 {
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x1.AuxInt != i+2 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s1.AuxInt != 24 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBload {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x2.AuxInt != i+3 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if x2.Aux != s {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if p != x2.Args[0] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if mem != x2.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if p != x1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL                  x0:(MOVBloadidx1 [i]   {s} p idx mem)     s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if s0.AuxInt != 8 {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := s0.Args[0]
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL o0:(ORL                        x0:(MOVWloadidx1 [i]   {s} p idx mem)     s0:(SHLLconst [16] x1:(MOVBloadidx1 [i+2] {s} p idx mem)))     s1:(SHLLconst [24] x2:(MOVBloadidx1 [i+3] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpAMD64ORL {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
+               idx := x0.Args[0]
+               p := x0.Args[1]
                mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if s0.AuxInt != 16 {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := s0.Args[0]
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x1.AuxInt != i+2 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if s1.AuxInt != 24 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if x2.AuxInt != i+3 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x2.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if p != x2.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x2.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x2.Args[2] {
+               if idx != x1.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if mem != x1.Args[2] {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL                        x0:(MOVBload [i] {s} p mem)     s0:(SHLLconst [8]  x1:(MOVBload [i-1] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i-1] {s} p mem))
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if s0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x1.AuxInt != i-1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v1.AuxInt = i - 1
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL                        x0:(MOVBloadidx1 [i] {s} p idx mem)     s0:(SHLLconst [8]  x1:(MOVBloadidx1 [i-1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 <v.Type> [i-1] {s} p idx mem))
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if s0.AuxInt != 8 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := s0.Args[0]
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v1.AuxInt = i - 1
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL o1:(ORL o0:(ROLWconst [8] x01:(MOVWload [i1] {s} p mem))     s1:(SHLLconst [16] x2:(MOVBload [i1-1] {s} p mem)))     s2:(SHLLconst [24] x3:(MOVBload [i1-2] {s} p mem)))
-       // cond: x01.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && mergePoint(b,x01,x2,x3) != nil   && clobber(x01)   && clobber(x2)   && clobber(x3)   && clobber(s1)   && clobber(s2)   && clobber(o0)   && clobber(o1)
-       // result: @mergePoint(b,x01,x2,x3) (BSWAPL <v.Type> (MOVLload [i1-2] {s} p mem))
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o1 := v.Args[0]
-               if o1.Op != OpAMD64ORL {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               o0 := o1.Args[0]
-               if o0.Op != OpAMD64ROLWconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if o0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x01 := o0.Args[0]
-               if x01.Op != OpAMD64MOVWload {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               i1 := x01.AuxInt
-               s := x01.Aux
-               p := x01.Args[0]
-               mem := x01.Args[1]
-               s1 := o1.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if s1.AuxInt != 16 {
+               if idx != x1.Args[0] {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBload {
+               if p != x1.Args[1] {
                        break
                }
-               if x2.AuxInt != i1-1 {
+               if mem != x1.Args[2] {
                        break
                }
-               if x2.Aux != s {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if p != x2.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if mem != x2.Args[1] {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               s2 := v.Args[1]
-               if s2.Op != OpAMD64SHLLconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if s2.AuxInt != 24 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBload {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x3.AuxInt != i1-2 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x3.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x3.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x3.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x01.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && mergePoint(b, x01, x2, x3) != nil && clobber(x01) && clobber(x2) && clobber(x3) && clobber(s1) && clobber(s2) && clobber(o0) && clobber(o1)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x01, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v1.AuxInt = i1 - 2
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL o1:(ORL o0:(ROLWconst [8] x01:(MOVWloadidx1 [i1] {s} p idx mem))     s1:(SHLLconst [16] x2:(MOVBloadidx1 [i1-1] {s} p idx mem)))     s2:(SHLLconst [24] x3:(MOVBloadidx1 [i1-2] {s} p idx mem)))
-       // cond: x01.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && mergePoint(b,x01,x2,x3) != nil   && clobber(x01)   && clobber(x2)   && clobber(x3)   && clobber(s1)   && clobber(s2)   && clobber(o0)   && clobber(o1)
-       // result: @mergePoint(b,x01,x2,x3) (BSWAPL <v.Type> (MOVLloadidx1 <v.Type> [i1-2] {s} p idx mem))
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o1 := v.Args[0]
-               if o1.Op != OpAMD64ORL {
-                       break
-               }
-               o0 := o1.Args[0]
-               if o0.Op != OpAMD64ROLWconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if o0.AuxInt != 8 {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x01 := o0.Args[0]
-               if x01.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x01.AuxInt
-               s := x01.Aux
-               p := x01.Args[0]
-               idx := x01.Args[1]
-               mem := x01.Args[2]
-               s1 := o1.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if s1.AuxInt != 16 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBloadidx1 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x2.AuxInt != i1-1 {
+               if p != x0.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x2.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if idx != x2.Args[1] {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x2.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               s2 := v.Args[1]
-               if s2.Op != OpAMD64SHLLconst {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBloadidx1 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if x3.AuxInt != i1-2 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x3.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x3.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if idx != x3.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x3.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x01.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && mergePoint(b, x01, x2, x3) != nil && clobber(x01) && clobber(x2) && clobber(x3) && clobber(s1) && clobber(s2) && clobber(o0) && clobber(o1)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x01, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type)
-               v1.AuxInt = i1 - 2
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ORLmem x [off] {sym} ptr mem)
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               v.reset(OpAMD64ORLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORL l:(MOVLload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ORLmem x [off] {sym} ptr mem)
-       for {
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               v.reset(OpAMD64ORLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
-       // match: (ORLconst [c] x)
-       // cond: int32(c)==0
-       // result: x
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(int32(c) == 0) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORLconst [c] _)
-       // cond: int32(c)==-1
-       // result: (MOVLconst [-1])
-       for {
-               c := v.AuxInt
-               if !(int32(c) == -1) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = -1
-               return true
-       }
-       // match: (ORLconst [c] (MOVLconst [d]))
-       // cond:
-       // result: (MOVLconst [c|d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = c | d
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (ORQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (ORQconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if p != x0.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(OpAMD64ORQconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (ORQconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if mem != x0.Args[2] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(is32Bit(c)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpAMD64ORQconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: ( ORQ (SHLQconst x [c]) (SHRQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [   c])
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if x != v_1.Args[0] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORQ (SHRQconst x [c]) (SHLQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [64-c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if idx != x0.Args[0] {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               if p != x0.Args[1] {
                        break
                }
-               if x != v_1.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ x:(SHLQconst _) y)
-       // cond: y.Op != OpAMD64SHLQconst
-       // result: (ORQ y x)
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64SHLQconst {
-                       break
-               }
-               y := v.Args[1]
-               if !(y.Op != OpAMD64SHLQconst) {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               v.reset(OpAMD64ORQ)
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ                        x0:(MOVBload [i]   {s} p mem)     s0:(SHLQconst [8]  x1:(MOVBload [i+1] {s} p mem)))     s1:(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem)))     s2:(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem)))     s3:(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem)))     s4:(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem)))     s5:(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem)))     s6:(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)
-       for {
-               o0 := v.Args[0]
-               if o0.Op != OpAMD64ORQ {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               o1 := o0.Args[0]
-               if o1.Op != OpAMD64ORQ {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               o2 := o1.Args[0]
-               if o2.Op != OpAMD64ORQ {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               o3 := o2.Args[0]
-               if o3.Op != OpAMD64ORQ {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpAMD64ORQ {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpAMD64ORQ {
+               if idx != x0.Args[0] {
                        break
                }
-               x0 := o5.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if p != x0.Args[1] {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o5.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if mem != x0.Args[2] {
                        break
                }
-               if s0.AuxInt != 8 {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if x1.AuxInt != i+1 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x1.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if p != x1.Args[0] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if mem != x1.Args[1] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if s1.AuxInt != 16 {
+               if idx != x0.Args[0] {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBload {
+               if p != x0.Args[1] {
                        break
                }
-               if x2.AuxInt != i+2 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x2.Aux != s {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if p != x2.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if mem != x2.Args[1] {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpAMD64SHLQconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if s2.AuxInt != 24 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBload {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x3.AuxInt != i+3 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x3.Aux != s {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x3.Args[0] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x3.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpAMD64SHLQconst {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s3.AuxInt != 32 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpAMD64MOVBload {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x4.AuxInt != i+4 {
+               if r0.AuxInt != 8 {
                        break
                }
-               if x4.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if p != x4.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if mem != x4.Args[1] {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpAMD64SHLQconst {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if s4.AuxInt != 40 {
+               if r1.AuxInt != 8 {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpAMD64MOVBload {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x5.AuxInt != i+5 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x5.Aux != s {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x5.Args[0] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x5.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpAMD64SHLQconst {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s5.AuxInt != 48 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpAMD64MOVBload {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x6.AuxInt != i+6 {
+               if r0.AuxInt != 8 {
                        break
                }
-               if x6.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if p != x6.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if mem != x6.Args[1] {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpAMD64SHLQconst {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               if r1.AuxInt != 8 {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpAMD64MOVBload {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x7.AuxInt != i+7 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x7.Aux != s {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ                        x0:(MOVBloadidx1 [i]   {s} p idx mem)     s0:(SHLQconst [8]  x1:(MOVBloadidx1 [i+1] {s} p idx mem)))     s1:(SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem)))     s2:(SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem)))     s3:(SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem)))     s4:(SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem)))     s5:(SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem)))     s6:(SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpAMD64ORQ {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpAMD64ORQ {
-                       break
-               }
-               o2 := o1.Args[0]
-               if o2.Op != OpAMD64ORQ {
-                       break
-               }
-               o3 := o2.Args[0]
-               if o3.Op != OpAMD64ORQ {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpAMD64ORQ {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpAMD64ORQ {
+               if r0.AuxInt != 8 {
                        break
                }
-               x0 := o5.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s0 := o5.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
                        break
                }
-               if x1.AuxInt != i+1 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               s1 := o4.Args[1]
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if s1.AuxInt != 16 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
                        break
                }
-               if x2.AuxInt != i+2 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x2.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x2.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x2.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x2.Args[2] {
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpAMD64SHLQconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBloadidx1 {
+               if r0.AuxInt != 8 {
                        break
                }
-               if x3.AuxInt != i+3 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x3.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if p != x3.Args[0] {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if idx != x3.Args[1] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if mem != x3.Args[2] {
+               if r1.AuxInt != 8 {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpAMD64SHLQconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if s3.AuxInt != 32 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpAMD64MOVBloadidx1 {
+               if p != x1.Args[0] {
                        break
                }
-               if x4.AuxInt != i+4 {
+               if idx != x1.Args[1] {
                        break
                }
-               if x4.Aux != s {
+               if mem != x1.Args[2] {
                        break
                }
-               if p != x4.Args[0] {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if idx != x4.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if mem != x4.Args[2] {
+               if r0.AuxInt != 8 {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpAMD64SHLQconst {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if s4.AuxInt != 40 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpAMD64MOVBloadidx1 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x5.AuxInt != i+5 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x5.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x5.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if idx != x5.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if mem != x5.Args[2] {
+               if p != x1.Args[0] {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpAMD64SHLQconst {
+               if idx != x1.Args[1] {
                        break
                }
-               if s5.AuxInt != 48 {
+               if mem != x1.Args[2] {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpAMD64MOVBloadidx1 {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if x6.AuxInt != i+6 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if x6.Aux != s {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if p != x6.Args[0] {
+               if r0.AuxInt != 8 {
                        break
                }
-               if idx != x6.Args[1] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x6.Args[2] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpAMD64SHLQconst {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
                        break
                }
-               if x7.AuxInt != i+7 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x7.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x7.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ                        x0:(MOVBload [i] {s} p mem)     s0:(SHLQconst [8]  x1:(MOVBload [i-1] {s} p mem)))     s1:(SHLQconst [16] x2:(MOVBload [i-2] {s} p mem)))     s2:(SHLQconst [24] x3:(MOVBload [i-3] {s} p mem)))     s3:(SHLQconst [32] x4:(MOVBload [i-4] {s} p mem)))     s4:(SHLQconst [40] x5:(MOVBload [i-5] {s} p mem)))     s5:(SHLQconst [48] x6:(MOVBload [i-6] {s} p mem)))     s6:(SHLQconst [56] x7:(MOVBload [i-7] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQload [i-7] {s} p mem))
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o5 := v.Args[0]
-               if o5.Op != OpAMD64ORQ {
-                       break
-               }
-               o4 := o5.Args[0]
-               if o4.Op != OpAMD64ORQ {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               o3 := o4.Args[0]
-               if o3.Op != OpAMD64ORQ {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               o2 := o3.Args[0]
-               if o2.Op != OpAMD64ORQ {
+               if r0.AuxInt != 8 {
                        break
                }
-               o1 := o2.Args[0]
-               if o1.Op != OpAMD64ORQ {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               o0 := o1.Args[0]
-               if o0.Op != OpAMD64ORQ {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               if r1.AuxInt != 8 {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x1.AuxInt != i-1 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x1.Aux != s {
+               if idx != x1.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               s1 := o1.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s1.AuxInt != 16 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBload {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x2.AuxInt != i-2 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x2.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x2.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x2.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               s2 := o2.Args[1]
-               if s2.Op != OpAMD64SHLQconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               if r0.AuxInt != 8 {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBload {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x3.AuxInt != i-3 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x3.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x3.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x3.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               s3 := o3.Args[1]
-               if s3.Op != OpAMD64SHLQconst {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s3.AuxInt != 32 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpAMD64MOVBload {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x4.AuxInt != i-4 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x4.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x4.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x4.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               s4 := o4.Args[1]
-               if s4.Op != OpAMD64SHLQconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if s4.AuxInt != 40 {
+               if r0.AuxInt != 8 {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpAMD64MOVBload {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x5.AuxInt != i-5 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x5.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x5.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x5.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               s5 := o5.Args[1]
-               if s5.Op != OpAMD64SHLQconst {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s5.AuxInt != 48 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpAMD64MOVBload {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x6.AuxInt != i-6 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x6.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x6.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x6.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpAMD64SHLQconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               if r0.AuxInt != 8 {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpAMD64MOVBload {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x7.AuxInt != i-7 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x7.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
-               v1.AuxInt = i - 7
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ                        x0:(MOVBloadidx1 [i] {s} p idx mem)     s0:(SHLQconst [8]  x1:(MOVBloadidx1 [i-1] {s} p idx mem)))     s1:(SHLQconst [16] x2:(MOVBloadidx1 [i-2] {s} p idx mem)))     s2:(SHLQconst [24] x3:(MOVBloadidx1 [i-3] {s} p idx mem)))     s3:(SHLQconst [32] x4:(MOVBloadidx1 [i-4] {s} p idx mem)))     s4:(SHLQconst [40] x5:(MOVBloadidx1 [i-5] {s} p idx mem)))     s5:(SHLQconst [48] x6:(MOVBloadidx1 [i-6] {s} p idx mem)))     s6:(SHLQconst [56] x7:(MOVBloadidx1 [i-7] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQloadidx1 <v.Type> [i-7] {s} p idx mem))
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o5 := v.Args[0]
-               if o5.Op != OpAMD64ORQ {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               o4 := o5.Args[0]
-               if o4.Op != OpAMD64ORQ {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               o3 := o4.Args[0]
-               if o3.Op != OpAMD64ORQ {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               o2 := o3.Args[0]
-               if o2.Op != OpAMD64ORQ {
+               if r1.AuxInt != 8 {
                        break
                }
-               o1 := o2.Args[0]
-               if o1.Op != OpAMD64ORQ {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               o0 := o1.Args[0]
-               if o0.Op != OpAMD64ORQ {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if r0.AuxInt != 8 {
                        break
                }
-               if s0.AuxInt != 8 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x1.AuxInt != i-1 {
+               if p != x0.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x1.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if idx != x1.Args[1] {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x1.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               s1 := o1.Args[1]
+               s1 := or.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if s1.AuxInt != 16 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
                        break
                }
-               if x2.AuxInt != i-2 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x2.Aux != s {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if p != x2.Args[0] {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if idx != x2.Args[1] {
+               if r0.AuxInt != 8 {
                        break
                }
-               if mem != x2.Args[2] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               s2 := o2.Args[1]
-               if s2.Op != OpAMD64SHLQconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if s2.AuxInt != 24 {
+               if idx != x0.Args[0] {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBloadidx1 {
+               if p != x0.Args[1] {
                        break
                }
-               if x3.AuxInt != i-3 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x3.Aux != s {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if p != x3.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if idx != x3.Args[1] {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if mem != x3.Args[2] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               s3 := o3.Args[1]
-               if s3.Op != OpAMD64SHLQconst {
+               if r1.AuxInt != 8 {
                        break
                }
-               if s3.AuxInt != 32 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpAMD64MOVBloadidx1 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if x4.AuxInt != i-4 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x4.Aux != s {
+               if r0.AuxInt != 8 {
                        break
                }
-               if p != x4.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if idx != x4.Args[1] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if mem != x4.Args[2] {
+               if idx != x0.Args[0] {
                        break
                }
-               s4 := o4.Args[1]
-               if s4.Op != OpAMD64SHLQconst {
+               if p != x0.Args[1] {
                        break
                }
-               if s4.AuxInt != 40 {
+               if mem != x0.Args[2] {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpAMD64MOVBloadidx1 {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if x5.AuxInt != i-5 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if x5.Aux != s {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if p != x5.Args[0] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if idx != x5.Args[1] {
+               if r1.AuxInt != 8 {
                        break
                }
-               if mem != x5.Args[2] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               s5 := o5.Args[1]
-               if s5.Op != OpAMD64SHLQconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if s5.AuxInt != 48 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpAMD64MOVBloadidx1 {
+               if r0.AuxInt != 8 {
                        break
                }
-               if x6.AuxInt != i-6 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x6.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x6.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x6.Args[1] {
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if mem != x6.Args[2] {
+               if r1.AuxInt != 8 {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpAMD64SHLQconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if s6.AuxInt != 56 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpAMD64MOVBloadidx1 {
+               if r0.AuxInt != 8 {
                        break
                }
-               if x7.AuxInt != i-7 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x7.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x7.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, v.Type)
-               v1.AuxInt = i - 7
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
        // match: (ORQ x l:(MOVQload [off] {sym} ptr mem))
@@ -14900,6 +82804,37 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETEQ (TESTL y (SHLL (MOVLconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETAE (BTL x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETEQ (TESTQ (SHLQ (MOVQconst [1]) x) y))
        // cond: !config.nacl
        // result: (SETAE (BTQ x y))
@@ -14931,6 +82866,37 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETEQ (TESTQ y (SHLQ (MOVQconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETAE (BTQ x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETEQ (TESTLconst [c] x))
        // cond: isPowerOfTwo(c) && log2(c) < 32 && !config.nacl
        // result: (SETAE (BTLconst [log2(c)] x))
@@ -14995,6 +82961,30 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETEQ (TESTQ x (MOVQconst [c])))
+       // cond: isPowerOfTwo(c) && log2(c) < 64 && !config.nacl
+       // result: (SETAE (BTQconst [log2(c)] x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(c) && log2(c) < 64 && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETEQ (InvertFlags x))
        // cond:
        // result: (SETEQ x)
@@ -15410,6 +83400,37 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETNE (TESTL y (SHLL (MOVLconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETB  (BTL x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETNE (TESTQ (SHLQ (MOVQconst [1]) x) y))
        // cond: !config.nacl
        // result: (SETB  (BTQ x y))
@@ -15441,6 +83462,37 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETNE (TESTQ y (SHLQ (MOVQconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETB  (BTQ x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETNE (TESTLconst [c] x))
        // cond: isPowerOfTwo(c) && log2(c) < 32 && !config.nacl
        // result: (SETB  (BTLconst [log2(c)] x))
@@ -15505,6 +83557,30 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETNE (TESTQ x (MOVQconst [c])))
+       // cond: isPowerOfTwo(c) && log2(c) < 64 && !config.nacl
+       // result: (SETB  (BTQconst [log2(c)] x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(c) && log2(c) < 64 && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETNE (InvertFlags x))
        // cond:
        // result: (SETNE x)
@@ -16388,23 +84464,6 @@ func rewriteValueAMD64_OpAMD64TESTB(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64TESTL(v *Value) bool {
-       // match: (TESTL y x:(SHLL _ _))
-       // cond: y.Op != OpAMD64SHLL
-       // result: (TESTL x y)
-       for {
-               y := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64SHLL {
-                       break
-               }
-               if !(y.Op != OpAMD64SHLL) {
-                       break
-               }
-               v.reset(OpAMD64TESTL)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
        // match: (TESTL (MOVLconst [c]) x)
        // cond:
        // result: (TESTLconst [c] x)
@@ -16438,23 +84497,6 @@ func rewriteValueAMD64_OpAMD64TESTL(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64TESTQ(v *Value) bool {
-       // match: (TESTQ y x:(SHLQ _ _))
-       // cond: y.Op != OpAMD64SHLQ
-       // result: (TESTQ x y)
-       for {
-               y := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64SHLQ {
-                       break
-               }
-               if !(y.Op != OpAMD64SHLQ) {
-                       break
-               }
-               v.reset(OpAMD64TESTQ)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
        // match: (TESTQ (MOVQconst [c]) x)
        // cond: is32Bit(c)
        // result: (TESTQconst [c] x)
@@ -16721,9 +84763,9 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL (SHLLconst x [c]) (SHRLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [   c])
+       // match: (XORL (SHLLconst x [c]) (SHRLconst x [d]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLLconst {
@@ -16735,10 +84777,11 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                if v_1.Op != OpAMD64SHRLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
@@ -16746,34 +84789,35 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL (SHRLconst x [c]) (SHLLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [32-c])
+       // match: (XORL (SHRLconst x [d]) (SHLLconst x [c]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -16786,13 +84830,11 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                if v_1.Op != OpAMD64SHRWconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
@@ -16800,38 +84842,36 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (XORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
-               v.AuxInt = 16 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -16844,13 +84884,11 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                if v_1.Op != OpAMD64SHRBconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
@@ -16858,32 +84896,30 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
+       // match: (XORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRBconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
-               v.AuxInt = 8 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -17033,9 +85069,9 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (SHLQconst x [c]) (SHRQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [   c])
+       // match: (XORQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLQconst {
@@ -17047,10 +85083,11 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpAMD64ROLQconst)
@@ -17058,28 +85095,29 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (SHRQconst x [c]) (SHLQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [64-c])
+       // match: (XORQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpAMD64ROLQconst)
-               v.AuxInt = 64 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -17192,7 +85230,7 @@ func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAdd16(v *Value) bool {
-       // match: (Add16  x y)
+       // match: (Add16 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -17205,7 +85243,7 @@ func rewriteValueAMD64_OpAdd16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpAdd32(v *Value) bool {
-       // match: (Add32  x y)
+       // match: (Add32 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -17231,7 +85269,7 @@ func rewriteValueAMD64_OpAdd32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpAdd64(v *Value) bool {
-       // match: (Add64  x y)
+       // match: (Add64 x y)
        // cond:
        // result: (ADDQ  x y)
        for {
@@ -17257,7 +85295,7 @@ func rewriteValueAMD64_OpAdd64F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpAdd8(v *Value) bool {
-       // match: (Add8   x y)
+       // match: (Add8 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -17379,7 +85417,7 @@ func rewriteValueAMD64_OpAnd64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpAnd8(v *Value) bool {
-       // match: (And8  x y)
+       // match: (And8 x y)
        // cond:
        // result: (ANDL x y)
        for {
@@ -17824,7 +85862,7 @@ func rewriteValueAMD64_OpCom64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpCom8(v *Value) bool {
-       // match: (Com8  x)
+       // match: (Com8 x)
        // cond:
        // result: (NOTL x)
        for {
@@ -17835,7 +85873,7 @@ func rewriteValueAMD64_OpCom8(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpConst16(v *Value) bool {
-       // match: (Const16  [val])
+       // match: (Const16 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -17846,7 +85884,7 @@ func rewriteValueAMD64_OpConst16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpConst32(v *Value) bool {
-       // match: (Const32  [val])
+       // match: (Const32 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -17868,7 +85906,7 @@ func rewriteValueAMD64_OpConst32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpConst64(v *Value) bool {
-       // match: (Const64  [val])
+       // match: (Const64 [val])
        // cond:
        // result: (MOVQconst [val])
        for {
@@ -17890,7 +85928,7 @@ func rewriteValueAMD64_OpConst64F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpConst8(v *Value) bool {
-       // match: (Const8   [val])
+       // match: (Const8 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -18159,7 +86197,7 @@ func rewriteValueAMD64_OpDiv16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16  x y)
+       // match: (Div16 x y)
        // cond:
        // result: (Select0 (DIVW  x y))
        for {
@@ -18197,7 +86235,7 @@ func rewriteValueAMD64_OpDiv32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div32  x y)
+       // match: (Div32 x y)
        // cond:
        // result: (Select0 (DIVL  x y))
        for {
@@ -18248,7 +86286,7 @@ func rewriteValueAMD64_OpDiv64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div64  x y)
+       // match: (Div64 x y)
        // cond:
        // result: (Select0 (DIVQ  x y))
        for {
@@ -18299,7 +86337,7 @@ func rewriteValueAMD64_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   x y)
+       // match: (Div8 x y)
        // cond:
        // result: (Select0 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
        for {
@@ -18322,7 +86360,7 @@ func rewriteValueAMD64_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  x y)
+       // match: (Div8u x y)
        // cond:
        // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
        for {
@@ -18343,7 +86381,7 @@ func rewriteValueAMD64_OpDiv8u(v *Value) bool {
 func rewriteValueAMD64_OpEq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq16  x y)
+       // match: (Eq16 x y)
        // cond:
        // result: (SETEQ (CMPW x y))
        for {
@@ -18360,7 +86398,7 @@ func rewriteValueAMD64_OpEq16(v *Value) bool {
 func rewriteValueAMD64_OpEq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq32  x y)
+       // match: (Eq32 x y)
        // cond:
        // result: (SETEQ (CMPL x y))
        for {
@@ -18394,7 +86432,7 @@ func rewriteValueAMD64_OpEq32F(v *Value) bool {
 func rewriteValueAMD64_OpEq64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq64  x y)
+       // match: (Eq64 x y)
        // cond:
        // result: (SETEQ (CMPQ x y))
        for {
@@ -18428,7 +86466,7 @@ func rewriteValueAMD64_OpEq64F(v *Value) bool {
 func rewriteValueAMD64_OpEq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq8   x y)
+       // match: (Eq8 x y)
        // cond:
        // result: (SETEQ (CMPB x y))
        for {
@@ -18445,7 +86483,7 @@ func rewriteValueAMD64_OpEq8(v *Value) bool {
 func rewriteValueAMD64_OpEqB(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (EqB   x y)
+       // match: (EqB x y)
        // cond:
        // result: (SETEQ (CMPB x y))
        for {
@@ -18501,7 +86539,7 @@ func rewriteValueAMD64_OpEqPtr(v *Value) bool {
 func rewriteValueAMD64_OpGeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq16  x y)
+       // match: (Geq16 x y)
        // cond:
        // result: (SETGE (CMPW x y))
        for {
@@ -18535,7 +86573,7 @@ func rewriteValueAMD64_OpGeq16U(v *Value) bool {
 func rewriteValueAMD64_OpGeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq32  x y)
+       // match: (Geq32 x y)
        // cond:
        // result: (SETGE (CMPL x y))
        for {
@@ -18586,7 +86624,7 @@ func rewriteValueAMD64_OpGeq32U(v *Value) bool {
 func rewriteValueAMD64_OpGeq64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq64  x y)
+       // match: (Geq64 x y)
        // cond:
        // result: (SETGE (CMPQ x y))
        for {
@@ -18637,7 +86675,7 @@ func rewriteValueAMD64_OpGeq64U(v *Value) bool {
 func rewriteValueAMD64_OpGeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq8   x y)
+       // match: (Geq8 x y)
        // cond:
        // result: (SETGE (CMPB x y))
        for {
@@ -18654,7 +86692,7 @@ func rewriteValueAMD64_OpGeq8(v *Value) bool {
 func rewriteValueAMD64_OpGeq8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq8U  x y)
+       // match: (Geq8U x y)
        // cond:
        // result: (SETAE (CMPB x y))
        for {
@@ -18691,7 +86729,7 @@ func rewriteValueAMD64_OpGetG(v *Value) bool {
 func rewriteValueAMD64_OpGreater16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater16  x y)
+       // match: (Greater16 x y)
        // cond:
        // result: (SETG (CMPW x y))
        for {
@@ -18725,7 +86763,7 @@ func rewriteValueAMD64_OpGreater16U(v *Value) bool {
 func rewriteValueAMD64_OpGreater32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater32  x y)
+       // match: (Greater32 x y)
        // cond:
        // result: (SETG (CMPL x y))
        for {
@@ -18776,7 +86814,7 @@ func rewriteValueAMD64_OpGreater32U(v *Value) bool {
 func rewriteValueAMD64_OpGreater64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater64  x y)
+       // match: (Greater64 x y)
        // cond:
        // result: (SETG (CMPQ x y))
        for {
@@ -18827,7 +86865,7 @@ func rewriteValueAMD64_OpGreater64U(v *Value) bool {
 func rewriteValueAMD64_OpGreater8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater8   x y)
+       // match: (Greater8 x y)
        // cond:
        // result: (SETG (CMPB x y))
        for {
@@ -18844,7 +86882,7 @@ func rewriteValueAMD64_OpGreater8(v *Value) bool {
 func rewriteValueAMD64_OpGreater8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater8U  x y)
+       // match: (Greater8U x y)
        // cond:
        // result: (SETA (CMPB x y))
        for {
@@ -18859,7 +86897,7 @@ func rewriteValueAMD64_OpGreater8U(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpHmul32(v *Value) bool {
-       // match: (Hmul32  x y)
+       // match: (Hmul32 x y)
        // cond:
        // result: (HMULL  x y)
        for {
@@ -18885,7 +86923,7 @@ func rewriteValueAMD64_OpHmul32u(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpHmul64(v *Value) bool {
-       // match: (Hmul64  x y)
+       // match: (Hmul64 x y)
        // cond:
        // result: (HMULQ  x y)
        for {
@@ -19011,7 +87049,7 @@ func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool {
 func rewriteValueAMD64_OpLeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq16  x y)
+       // match: (Leq16 x y)
        // cond:
        // result: (SETLE (CMPW x y))
        for {
@@ -19045,7 +87083,7 @@ func rewriteValueAMD64_OpLeq16U(v *Value) bool {
 func rewriteValueAMD64_OpLeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq32  x y)
+       // match: (Leq32 x y)
        // cond:
        // result: (SETLE (CMPL x y))
        for {
@@ -19096,7 +87134,7 @@ func rewriteValueAMD64_OpLeq32U(v *Value) bool {
 func rewriteValueAMD64_OpLeq64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq64  x y)
+       // match: (Leq64 x y)
        // cond:
        // result: (SETLE (CMPQ x y))
        for {
@@ -19147,7 +87185,7 @@ func rewriteValueAMD64_OpLeq64U(v *Value) bool {
 func rewriteValueAMD64_OpLeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq8   x y)
+       // match: (Leq8 x y)
        // cond:
        // result: (SETLE (CMPB x y))
        for {
@@ -19164,7 +87202,7 @@ func rewriteValueAMD64_OpLeq8(v *Value) bool {
 func rewriteValueAMD64_OpLeq8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq8U  x y)
+       // match: (Leq8U x y)
        // cond:
        // result: (SETBE (CMPB x y))
        for {
@@ -19181,7 +87219,7 @@ func rewriteValueAMD64_OpLeq8U(v *Value) bool {
 func rewriteValueAMD64_OpLess16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less16  x y)
+       // match: (Less16 x y)
        // cond:
        // result: (SETL (CMPW x y))
        for {
@@ -19215,7 +87253,7 @@ func rewriteValueAMD64_OpLess16U(v *Value) bool {
 func rewriteValueAMD64_OpLess32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less32  x y)
+       // match: (Less32 x y)
        // cond:
        // result: (SETL (CMPL x y))
        for {
@@ -19266,7 +87304,7 @@ func rewriteValueAMD64_OpLess32U(v *Value) bool {
 func rewriteValueAMD64_OpLess64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less64  x y)
+       // match: (Less64 x y)
        // cond:
        // result: (SETL (CMPQ x y))
        for {
@@ -19317,7 +87355,7 @@ func rewriteValueAMD64_OpLess64U(v *Value) bool {
 func rewriteValueAMD64_OpLess8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less8   x y)
+       // match: (Less8 x y)
        // cond:
        // result: (SETL (CMPB x y))
        for {
@@ -19334,7 +87372,7 @@ func rewriteValueAMD64_OpLess8(v *Value) bool {
 func rewriteValueAMD64_OpLess8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less8U  x y)
+       // match: (Less8U x y)
        // cond:
        // result: (SETB (CMPB x y))
        for {
@@ -19520,7 +87558,7 @@ func rewriteValueAMD64_OpLsh16x64(v *Value) bool {
 func rewriteValueAMD64_OpLsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -19616,7 +87654,7 @@ func rewriteValueAMD64_OpLsh32x64(v *Value) bool {
 func rewriteValueAMD64_OpLsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -19712,7 +87750,7 @@ func rewriteValueAMD64_OpLsh64x64(v *Value) bool {
 func rewriteValueAMD64_OpLsh64x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x8  <t> x y)
+       // match: (Lsh64x8 <t> x y)
        // cond:
        // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
        for {
@@ -19808,7 +87846,7 @@ func rewriteValueAMD64_OpLsh8x64(v *Value) bool {
 func rewriteValueAMD64_OpLsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -19834,7 +87872,7 @@ func rewriteValueAMD64_OpMod16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod16  x y)
+       // match: (Mod16 x y)
        // cond:
        // result: (Select1 (DIVW  x y))
        for {
@@ -19872,7 +87910,7 @@ func rewriteValueAMD64_OpMod32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod32  x y)
+       // match: (Mod32 x y)
        // cond:
        // result: (Select1 (DIVL  x y))
        for {
@@ -19910,7 +87948,7 @@ func rewriteValueAMD64_OpMod64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod64  x y)
+       // match: (Mod64 x y)
        // cond:
        // result: (Select1 (DIVQ  x y))
        for {
@@ -19948,7 +87986,7 @@ func rewriteValueAMD64_OpMod8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8   x y)
+       // match: (Mod8 x y)
        // cond:
        // result: (Select1 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
        for {
@@ -19971,7 +88009,7 @@ func rewriteValueAMD64_OpMod8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8u  x y)
+       // match: (Mod8u x y)
        // cond:
        // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
        for {
@@ -20348,7 +88386,7 @@ func rewriteValueAMD64_OpMove(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpMul16(v *Value) bool {
-       // match: (Mul16  x y)
+       // match: (Mul16 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -20361,7 +88399,7 @@ func rewriteValueAMD64_OpMul16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpMul32(v *Value) bool {
-       // match: (Mul32  x y)
+       // match: (Mul32 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -20387,7 +88425,7 @@ func rewriteValueAMD64_OpMul32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpMul64(v *Value) bool {
-       // match: (Mul64  x y)
+       // match: (Mul64 x y)
        // cond:
        // result: (MULQ  x y)
        for {
@@ -20426,7 +88464,7 @@ func rewriteValueAMD64_OpMul64uhilo(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpMul8(v *Value) bool {
-       // match: (Mul8   x y)
+       // match: (Mul8 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -20439,7 +88477,7 @@ func rewriteValueAMD64_OpMul8(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpNeg16(v *Value) bool {
-       // match: (Neg16  x)
+       // match: (Neg16 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -20450,7 +88488,7 @@ func rewriteValueAMD64_OpNeg16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpNeg32(v *Value) bool {
-       // match: (Neg32  x)
+       // match: (Neg32 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -20479,7 +88517,7 @@ func rewriteValueAMD64_OpNeg32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpNeg64(v *Value) bool {
-       // match: (Neg64  x)
+       // match: (Neg64 x)
        // cond:
        // result: (NEGQ x)
        for {
@@ -20508,7 +88546,7 @@ func rewriteValueAMD64_OpNeg64F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpNeg8(v *Value) bool {
-       // match: (Neg8   x)
+       // match: (Neg8 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -20521,7 +88559,7 @@ func rewriteValueAMD64_OpNeg8(v *Value) bool {
 func rewriteValueAMD64_OpNeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq16  x y)
+       // match: (Neq16 x y)
        // cond:
        // result: (SETNE (CMPW x y))
        for {
@@ -20538,7 +88576,7 @@ func rewriteValueAMD64_OpNeq16(v *Value) bool {
 func rewriteValueAMD64_OpNeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq32  x y)
+       // match: (Neq32 x y)
        // cond:
        // result: (SETNE (CMPL x y))
        for {
@@ -20572,7 +88610,7 @@ func rewriteValueAMD64_OpNeq32F(v *Value) bool {
 func rewriteValueAMD64_OpNeq64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq64  x y)
+       // match: (Neq64 x y)
        // cond:
        // result: (SETNE (CMPQ x y))
        for {
@@ -20606,7 +88644,7 @@ func rewriteValueAMD64_OpNeq64F(v *Value) bool {
 func rewriteValueAMD64_OpNeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq8   x y)
+       // match: (Neq8 x y)
        // cond:
        // result: (SETNE (CMPB x y))
        for {
@@ -20623,7 +88661,7 @@ func rewriteValueAMD64_OpNeq8(v *Value) bool {
 func rewriteValueAMD64_OpNeqB(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (NeqB   x y)
+       // match: (NeqB x y)
        // cond:
        // result: (SETNE (CMPB x y))
        for {
@@ -20794,7 +88832,7 @@ func rewriteValueAMD64_OpOr64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpOr8(v *Value) bool {
-       // match: (Or8  x y)
+       // match: (Or8 x y)
        // cond:
        // result: (ORL x y)
        for {
@@ -20918,7 +88956,7 @@ func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool {
 func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16])))
        for {
@@ -21023,7 +89061,7 @@ func rewriteValueAMD64_OpRsh16x64(v *Value) bool {
 func rewriteValueAMD64_OpRsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x8  <t> x y)
+       // match: (Rsh16x8 <t> x y)
        // cond:
        // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16])))))
        for {
@@ -21122,7 +89160,7 @@ func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool {
 func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -21227,7 +89265,7 @@ func rewriteValueAMD64_OpRsh32x64(v *Value) bool {
 func rewriteValueAMD64_OpRsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x8  <t> x y)
+       // match: (Rsh32x8 <t> x y)
        // cond:
        // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32])))))
        for {
@@ -21326,7 +89364,7 @@ func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool {
 func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64Ux8  <t> x y)
+       // match: (Rsh64Ux8 <t> x y)
        // cond:
        // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
        for {
@@ -21431,7 +89469,7 @@ func rewriteValueAMD64_OpRsh64x64(v *Value) bool {
 func rewriteValueAMD64_OpRsh64x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x8  <t> x y)
+       // match: (Rsh64x8 <t> x y)
        // cond:
        // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64])))))
        for {
@@ -21530,7 +89568,7 @@ func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool {
 func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8])))
        for {
@@ -21635,7 +89673,7 @@ func rewriteValueAMD64_OpRsh8x64(v *Value) bool {
 func rewriteValueAMD64_OpRsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x8  <t> x y)
+       // match: (Rsh8x8 <t> x y)
        // cond:
        // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
        for {
@@ -21701,7 +89739,7 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpSelect1(v *Value) bool {
-       // match: (Select1     (AddTupleFirst32 tuple _  ))
+       // match: (Select1 (AddTupleFirst32 tuple _))
        // cond:
        // result: (Select1 tuple)
        for {
@@ -21714,7 +89752,7 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
                v.AddArg(tuple)
                return true
        }
-       // match: (Select1     (AddTupleFirst64 tuple _  ))
+       // match: (Select1 (AddTupleFirst64 tuple _))
        // cond:
        // result: (Select1 tuple)
        for {
@@ -21763,7 +89801,7 @@ func rewriteValueAMD64_OpSignExt32to64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  x)
+       // match: (SignExt8to16 x)
        // cond:
        // result: (MOVBQSX x)
        for {
@@ -21774,7 +89812,7 @@ func rewriteValueAMD64_OpSignExt8to16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  x)
+       // match: (SignExt8to32 x)
        // cond:
        // result: (MOVBQSX x)
        for {
@@ -21785,7 +89823,7 @@ func rewriteValueAMD64_OpSignExt8to32(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSignExt8to64(v *Value) bool {
-       // match: (SignExt8to64  x)
+       // match: (SignExt8to64 x)
        // cond:
        // result: (MOVBQSX x)
        for {
@@ -21944,7 +89982,7 @@ func rewriteValueAMD64_OpStore(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpSub16(v *Value) bool {
-       // match: (Sub16  x y)
+       // match: (Sub16 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -21957,7 +89995,7 @@ func rewriteValueAMD64_OpSub16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSub32(v *Value) bool {
-       // match: (Sub32  x y)
+       // match: (Sub32 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -21983,7 +90021,7 @@ func rewriteValueAMD64_OpSub32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSub64(v *Value) bool {
-       // match: (Sub64  x y)
+       // match: (Sub64 x y)
        // cond:
        // result: (SUBQ  x y)
        for {
@@ -22009,7 +90047,7 @@ func rewriteValueAMD64_OpSub64F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSub8(v *Value) bool {
-       // match: (Sub8   x y)
+       // match: (Sub8 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -22057,7 +90095,7 @@ func rewriteValueAMD64_OpSubPtr(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  x)
+       // match: (Trunc16to8 x)
        // cond:
        // result: x
        for {
@@ -22081,7 +90119,7 @@ func rewriteValueAMD64_OpTrunc32to16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  x)
+       // match: (Trunc32to8 x)
        // cond:
        // result: x
        for {
@@ -22117,7 +90155,7 @@ func rewriteValueAMD64_OpTrunc64to32(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpTrunc64to8(v *Value) bool {
-       // match: (Trunc64to8  x)
+       // match: (Trunc64to8 x)
        // cond:
        // result: x
        for {
@@ -22168,7 +90206,7 @@ func rewriteValueAMD64_OpXor64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpXor8(v *Value) bool {
-       // match: (Xor8  x y)
+       // match: (Xor8 x y)
        // cond:
        // result: (XORL x y)
        for {
@@ -22529,7 +90567,7 @@ func rewriteValueAMD64_OpZeroExt32to64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  x)
+       // match: (ZeroExt8to16 x)
        // cond:
        // result: (MOVBQZX x)
        for {
@@ -22540,7 +90578,7 @@ func rewriteValueAMD64_OpZeroExt8to16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  x)
+       // match: (ZeroExt8to32 x)
        // cond:
        // result: (MOVBQZX x)
        for {
@@ -22551,7 +90589,7 @@ func rewriteValueAMD64_OpZeroExt8to32(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpZeroExt8to64(v *Value) bool {
-       // match: (ZeroExt8to64  x)
+       // match: (ZeroExt8to64 x)
        // cond:
        // result: (MOVBQZX x)
        for {
@@ -22601,6 +90639,37 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (EQ (TESTL y (SHLL (MOVLconst [1]) x)))
+               // cond: !config.nacl
+               // result: (UGE (BTL x y))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTL {
+                               break
+                       }
+                       y := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SHLL {
+                               break
+                       }
+                       v_1_0 := v_1.Args[0]
+                       if v_1_0.Op != OpAMD64MOVLconst {
+                               break
+                       }
+                       if v_1_0.AuxInt != 1 {
+                               break
+                       }
+                       x := v_1.Args[1]
+                       if !(!config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64UGE
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTL, TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (EQ (TESTQ (SHLQ (MOVQconst [1]) x) y))
                // cond: !config.nacl
                // result: (UGE (BTQ x y))
@@ -22632,6 +90701,37 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (EQ (TESTQ y (SHLQ (MOVQconst [1]) x)))
+               // cond: !config.nacl
+               // result: (UGE (BTQ x y))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTQ {
+                               break
+                       }
+                       y := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SHLQ {
+                               break
+                       }
+                       v_1_0 := v_1.Args[0]
+                       if v_1_0.Op != OpAMD64MOVQconst {
+                               break
+                       }
+                       if v_1_0.AuxInt != 1 {
+                               break
+                       }
+                       x := v_1.Args[1]
+                       if !(!config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64UGE
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQ, TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (EQ (TESTLconst [c] x))
                // cond: isPowerOfTwo(c) && log2(c) < 32 && !config.nacl
                // result: (UGE (BTLconst [log2(c)] x))
@@ -22696,6 +90796,30 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (EQ (TESTQ x (MOVQconst [c])))
+               // cond: isPowerOfTwo(c) && log2(c) < 64 && !config.nacl
+               // result: (UGE (BTQconst [log2(c)] x))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTQ {
+                               break
+                       }
+                       x := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64MOVQconst {
+                               break
+                       }
+                       c := v_1.AuxInt
+                       if !(isPowerOfTwo(c) && log2(c) < 64 && !config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64UGE
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, TypeFlags)
+                       v0.AuxInt = log2(c)
+                       v0.AddArg(x)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (EQ (InvertFlags cmp) yes no)
                // cond:
                // result: (EQ cmp yes no)
@@ -22999,7 +91123,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        return true
                }
        case BlockIf:
-               // match: (If (SETL  cmp) yes no)
+               // match: (If (SETL cmp) yes no)
                // cond:
                // result: (LT  cmp yes no)
                for {
@@ -23033,7 +91157,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETG  cmp) yes no)
+               // match: (If (SETG cmp) yes no)
                // cond:
                // result: (GT  cmp yes no)
                for {
@@ -23101,7 +91225,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETB  cmp) yes no)
+               // match: (If (SETB cmp) yes no)
                // cond:
                // result: (ULT cmp yes no)
                for {
@@ -23135,7 +91259,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETA  cmp) yes no)
+               // match: (If (SETA cmp) yes no)
                // cond:
                // result: (UGT cmp yes no)
                for {
@@ -23169,7 +91293,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETGF  cmp) yes no)
+               // match: (If (SETGF cmp) yes no)
                // cond:
                // result: (UGT  cmp yes no)
                for {
@@ -23457,7 +91581,35 @@ func rewriteBlockAMD64(b *Block) bool {
                        return true
                }
        case BlockAMD64NE:
-               // match: (NE (TESTB (SETL  cmp) (SETL  cmp)) yes no)
+               // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no)
+               // cond:
+               // result: (LT  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETL {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETL {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64LT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no)
                // cond:
                // result: (LT  cmp yes no)
                for {
@@ -23513,7 +91665,63 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETG  cmp) (SETG  cmp)) yes no)
+               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
+               // cond:
+               // result: (LE  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETLE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETLE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64LE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no)
+               // cond:
+               // result: (GT  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETG {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETG {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64GT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no)
                // cond:
                // result: (GT  cmp yes no)
                for {
@@ -23569,6 +91777,62 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
+               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
+               // cond:
+               // result: (GE  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETGE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETGE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64GE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
+               // cond:
+               // result: (EQ  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETEQ {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETEQ {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64EQ
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
                // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
                // cond:
                // result: (EQ  cmp yes no)
@@ -23625,7 +91889,63 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETB  cmp) (SETB  cmp)) yes no)
+               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
+               // cond:
+               // result: (NE  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETNE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETNE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64NE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no)
+               // cond:
+               // result: (ULT cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETB {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETB {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64ULT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no)
                // cond:
                // result: (ULT cmp yes no)
                for {
@@ -23681,7 +92001,63 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETA  cmp) (SETA  cmp)) yes no)
+               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
+               // cond:
+               // result: (ULE cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETBE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETBE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64ULE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no)
+               // cond:
+               // result: (UGT cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETA {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETA {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no)
                // cond:
                // result: (UGT cmp yes no)
                for {
@@ -23737,6 +92113,34 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
+               // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no)
+               // cond:
+               // result: (UGE cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETAE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETAE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
                // match: (NE (TESTL (SHLL (MOVLconst [1]) x) y))
                // cond: !config.nacl
                // result: (ULT (BTL x y))
@@ -23768,6 +92172,37 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (NE (TESTL y (SHLL (MOVLconst [1]) x)))
+               // cond: !config.nacl
+               // result: (ULT (BTL x y))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTL {
+                               break
+                       }
+                       y := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SHLL {
+                               break
+                       }
+                       v_1_0 := v_1.Args[0]
+                       if v_1_0.Op != OpAMD64MOVLconst {
+                               break
+                       }
+                       if v_1_0.AuxInt != 1 {
+                               break
+                       }
+                       x := v_1.Args[1]
+                       if !(!config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64ULT
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTL, TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (NE (TESTQ (SHLQ (MOVQconst [1]) x) y))
                // cond: !config.nacl
                // result: (ULT (BTQ x y))
@@ -23799,6 +92234,37 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (NE (TESTQ y (SHLQ (MOVQconst [1]) x)))
+               // cond: !config.nacl
+               // result: (ULT (BTQ x y))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTQ {
+                               break
+                       }
+                       y := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SHLQ {
+                               break
+                       }
+                       v_1_0 := v_1.Args[0]
+                       if v_1_0.Op != OpAMD64MOVQconst {
+                               break
+                       }
+                       if v_1_0.AuxInt != 1 {
+                               break
+                       }
+                       x := v_1.Args[1]
+                       if !(!config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64ULT
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQ, TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (NE (TESTLconst [c] x))
                // cond: isPowerOfTwo(c) && log2(c) < 32 && !config.nacl
                // result: (ULT (BTLconst [log2(c)] x))
@@ -23863,7 +92329,31 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
-               // match: (NE (TESTB (SETGF  cmp) (SETGF  cmp)) yes no)
+               // match: (NE (TESTQ x (MOVQconst [c])))
+               // cond: isPowerOfTwo(c) && log2(c) < 64 && !config.nacl
+               // result: (ULT (BTQconst [log2(c)] x))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTQ {
+                               break
+                       }
+                       x := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64MOVQconst {
+                               break
+                       }
+                       c := v_1.AuxInt
+                       if !(isPowerOfTwo(c) && log2(c) < 64 && !config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64ULT
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, TypeFlags)
+                       v0.AuxInt = log2(c)
+                       v0.AddArg(x)
+                       b.SetControl(v0)
+                       return true
+               }
+               // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no)
                // cond:
                // result: (UGT  cmp yes no)
                for {
@@ -23891,6 +92381,62 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
+               // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no)
+               // cond:
+               // result: (UGT  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETGF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETGF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no)
+               // cond:
+               // result: (UGE  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETGEF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETGEF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
                // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no)
                // cond:
                // result: (UGE  cmp yes no)
@@ -23947,6 +92493,62 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
+               // match: (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no)
+               // cond:
+               // result: (EQF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETEQF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETEQF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64EQF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no)
+               // cond:
+               // result: (NEF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETNEF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETNEF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64NEF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
                // match: (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no)
                // cond:
                // result: (NEF  cmp yes no)
index 0b554d79a411fe442787964489f85bcbb4681a17..25b698838317706f2c8d18db5afe5f65498d61c8 100644 (file)
@@ -758,6 +758,40 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC x (MOVWconst [c]) flags)
+       // cond:
+       // result: (ADCconst [c] x flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               flags := v.Args[2]
+               v.reset(OpARMADCconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC (MOVWconst [c]) x flags)
+       // cond:
+       // result: (ADCconst [c] x flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SLLconst [c] y) flags)
        // cond:
        // result: (ADCshiftLL x y [c] flags)
@@ -796,6 +830,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SLLconst [c] y) x flags)
+       // cond:
+       // result: (ADCshiftLL x y [c] flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SLLconst [c] y) flags)
+       // cond:
+       // result: (ADCshiftLL x y [c] flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SRLconst [c] y) flags)
        // cond:
        // result: (ADCshiftRL x y [c] flags)
@@ -834,6 +906,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SRLconst [c] y) x flags)
+       // cond:
+       // result: (ADCshiftRL x y [c] flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSRLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SRLconst [c] y) flags)
+       // cond:
+       // result: (ADCshiftRL x y [c] flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SRAconst [c] y) flags)
        // cond:
        // result: (ADCshiftRA x y [c] flags)
@@ -872,6 +982,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SRAconst [c] y) x flags)
+       // cond:
+       // result: (ADCshiftRA x y [c] flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSRAconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SRAconst [c] y) flags)
+       // cond:
+       // result: (ADCshiftRA x y [c] flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SLL y z) flags)
        // cond:
        // result: (ADCshiftLLreg x y z flags)
@@ -910,6 +1058,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SLL y z) x flags)
+       // cond:
+       // result: (ADCshiftLLreg x y z flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSLL {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftLLreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SLL y z) flags)
+       // cond:
+       // result: (ADCshiftLLreg x y z flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSLL {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftLLreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SRL y z) flags)
        // cond:
        // result: (ADCshiftRLreg x y z flags)
@@ -948,6 +1134,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SRL y z) x flags)
+       // cond:
+       // result: (ADCshiftRLreg x y z flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSRL {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRLreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SRL y z) flags)
+       // cond:
+       // result: (ADCshiftRLreg x y z flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSRL {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRLreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SRA y z) flags)
        // cond:
        // result: (ADCshiftRAreg x y z flags)
@@ -986,6 +1210,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SRA y z) x flags)
+       // cond:
+       // result: (ADCshiftRAreg x y z flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSRA {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRAreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SRA y z) flags)
+       // cond:
+       // result: (ADCshiftRAreg x y z flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSRA {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRAreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
        return false
 }
 func rewriteValueARM_OpARMADCconst(v *Value) bool {
@@ -1297,31 +1559,31 @@ func rewriteValueARM_OpARMADCshiftRLreg(v *Value) bool {
 func rewriteValueARM_OpARMADD(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (ADD (MOVWconst [c]) x)
+       // match: (ADD x (MOVWconst [c]))
        // cond:
        // result: (ADDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMADDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADD x (MOVWconst [c]))
+       // match: (ADD (MOVWconst [c]) x)
        // cond:
        // result: (ADDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMADDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -1592,6 +1854,31 @@ func rewriteValueARM_OpARMADD(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (ADD <t> (RSBconst [d] y) (RSBconst [c] x))
+       // cond:
+       // result: (RSBconst [c+d] (ADD <t> x y))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMRSBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMRSBconst {
+                       break
+               }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
+               v.reset(OpARMRSBconst)
+               v.AuxInt = c + d
+               v0 := b.NewValue0(v.Pos, OpARMADD, t)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (ADD (MUL x y) a)
        // cond:
        // result: (MULA x y a)
@@ -1629,31 +1916,31 @@ func rewriteValueARM_OpARMADD(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMADDS(v *Value) bool {
-       // match: (ADDS (MOVWconst [c]) x)
+       // match: (ADDS x (MOVWconst [c]))
        // cond:
        // result: (ADDSconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMADDSconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDS x (MOVWconst [c]))
+       // match: (ADDS (MOVWconst [c]) x)
        // cond:
        // result: (ADDSconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMADDSconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -2490,31 +2777,31 @@ func rewriteValueARM_OpARMADDshiftRLreg(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMAND(v *Value) bool {
-       // match: (AND (MOVWconst [c]) x)
+       // match: (AND x (MOVWconst [c]))
        // cond:
        // result: (ANDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMANDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (AND x (MOVWconst [c]))
+       // match: (AND (MOVWconst [c]) x)
        // cond:
        // result: (ANDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMANDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -7122,185 +7409,9 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                if !(int32(c) == -1) {
                        break
                }
-               v.reset(OpARMRSBconst)
-               v.AuxInt = 0
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL _ (MOVWconst [0]))
-       // cond:
-       // result: (MOVWconst [0])
-       for {
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               if v_1.AuxInt != 0 {
-                       break
-               }
-               v.reset(OpARMMOVWconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MUL x (MOVWconst [1]))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               if v_1.AuxInt != 1 {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
-                       break
-               }
-               v.reset(OpARMADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (RSBshiftLL x x [log2(c+1)])
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
-                       break
-               }
-               v.reset(OpARMRSBshiftLL)
-               v.AuxInt = log2(c + 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARMRSBshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARMRSBconst)
+               v.AuxInt = 0
+               v.AddArg(x)
                return true
        }
        // match: (MUL (MOVWconst [c]) x)
@@ -7321,6 +7432,21 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL _ (MOVWconst [0]))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARMMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
        // match: (MUL (MOVWconst [0]) _)
        // cond:
        // result: (MOVWconst [0])
@@ -7336,6 +7462,23 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (MUL x (MOVWconst [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [1]) x)
        // cond:
        // result: x
@@ -7353,6 +7496,24 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: isPowerOfTwo(c)
        // result: (SLLconst [log2(c)] x)
@@ -7371,6 +7532,25 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARMADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: isPowerOfTwo(c-1) && int32(c) >= 3
        // result: (ADDshiftLL x x [log2(c-1)])
@@ -7390,6 +7570,25 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (RSBshiftLL x x [log2(c+1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+                       break
+               }
+               v.reset(OpARMRSBshiftLL)
+               v.AuxInt = log2(c + 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: isPowerOfTwo(c+1) && int32(c) >= 7
        // result: (RSBshiftLL x x [log2(c+1)])
@@ -7409,6 +7608,28 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
        // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
@@ -7431,6 +7652,28 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
        // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
@@ -7453,6 +7696,28 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARMRSBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
        // result: (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
@@ -7475,6 +7740,28 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
        // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
@@ -7515,6 +7802,24 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AuxInt = int64(int32(c * d))
                return true
        }
+       // match: (MUL (MOVWconst [d]) (MOVWconst [c]))
+       // cond:
+       // result: (MOVWconst [int64(int32(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARMMOVWconst)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
        return false
 }
 func rewriteValueARM_OpARMMULA(v *Value) bool {
@@ -8282,31 +8587,31 @@ func rewriteValueARM_OpARMNotEqual(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMOR(v *Value) bool {
-       // match: (OR (MOVWconst [c]) x)
+       // match: (OR x (MOVWconst [c]))
        // cond:
        // result: (ORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (OR x (MOVWconst [c]))
+       // match: (OR (MOVWconst [c]) x)
        // cond:
        // result: (ORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -8627,7 +8932,7 @@ func rewriteValueARM_OpARMORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORshiftLL [c] (SRLconst x [32-c]) x)
+       // match: (ORshiftLL [c] (SRLconst x [32-c]) x)
        // cond:
        // result: (SRRconst [32-c] x)
        for {
@@ -8854,7 +9159,7 @@ func rewriteValueARM_OpARMORshiftRL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORshiftRL [c] (SLLconst x [32-c]) x)
+       // match: (ORshiftRL [c] (SLLconst x [32-c]) x)
        // cond:
        // result: (SRRconst [   c] x)
        for {
@@ -11110,21 +11415,6 @@ func rewriteValueARM_OpARMSUB(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMSUBS(v *Value) bool {
-       // match: (SUBS (MOVWconst [c]) x)
-       // cond:
-       // result: (RSBSconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARMRSBSconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
        // match: (SUBS x (MOVWconst [c]))
        // cond:
        // result: (SUBSconst [c] x)
@@ -11974,31 +12264,31 @@ func rewriteValueARM_OpARMSUBshiftRLreg(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMXOR(v *Value) bool {
-       // match: (XOR (MOVWconst [c]) x)
+       // match: (XOR x (MOVWconst [c]))
        // cond:
        // result: (XORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMXORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XOR x (MOVWconst [c]))
+       // match: (XOR (MOVWconst [c]) x)
        // cond:
        // result: (XORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMXORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -14486,7 +14776,7 @@ func rewriteValueARM_OpLsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x8  x y)
+       // match: (Lsh16x8 x y)
        // cond:
        // result: (SLL x (ZeroExt8to32 y))
        for {
@@ -14592,7 +14882,7 @@ func rewriteValueARM_OpLsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x8  x y)
+       // match: (Lsh32x8 x y)
        // cond:
        // result: (SLL x (ZeroExt8to32 y))
        for {
@@ -14698,7 +14988,7 @@ func rewriteValueARM_OpLsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x8  x y)
+       // match: (Lsh8x8 x y)
        // cond:
        // result: (SLL x (ZeroExt8to32 y))
        for {
@@ -15639,7 +15929,7 @@ func rewriteValueARM_OpRsh16Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16Ux8  x y)
+       // match: (Rsh16Ux8 x y)
        // cond:
        // result: (SRL (ZeroExt16to32 x) (ZeroExt8to32 y))
        for {
@@ -15759,7 +16049,7 @@ func rewriteValueARM_OpRsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x8  x y)
+       // match: (Rsh16x8 x y)
        // cond:
        // result: (SRA (SignExt16to32 x) (ZeroExt8to32 y))
        for {
@@ -15867,7 +16157,7 @@ func rewriteValueARM_OpRsh32Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32Ux8  x y)
+       // match: (Rsh32Ux8 x y)
        // cond:
        // result: (SRL x (ZeroExt8to32 y))
        for {
@@ -15969,7 +16259,7 @@ func rewriteValueARM_OpRsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x8  x y)
+       // match: (Rsh32x8 x y)
        // cond:
        // result: (SRA x (ZeroExt8to32 y))
        for {
@@ -16088,7 +16378,7 @@ func rewriteValueARM_OpRsh8Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux8  x y)
+       // match: (Rsh8Ux8 x y)
        // cond:
        // result: (SRL (ZeroExt8to32 x) (ZeroExt8to32 y))
        for {
@@ -16208,7 +16498,7 @@ func rewriteValueARM_OpRsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x8  x y)
+       // match: (Rsh8x8 x y)
        // cond:
        // result: (SRA (SignExt8to32 x) (ZeroExt8to32 y))
        for {
index 009e36b90fb283bcd3bfdc72ea76dfa8b8ded1df..57c5b04245694939c874beb87812a13090746d2e 100644 (file)
@@ -714,31 +714,31 @@ func rewriteValueARM64(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64ADD(v *Value) bool {
-       // match: (ADD (MOVDconst [c]) x)
+       // match: (ADD x (MOVDconst [c]))
        // cond:
        // result: (ADDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARM64ADDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADD x (MOVDconst [c]))
+       // match: (ADD (MOVDconst [c]) x)
        // cond:
        // result: (ADDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARM64ADDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -897,7 +897,7 @@ func rewriteValueARM64_OpARM64ADDconst(v *Value) bool {
                v.AddArg(ptr)
                return true
        }
-       // match: (ADDconst [0]  x)
+       // match: (ADDconst [0] x)
        // cond:
        // result: x
        for {
@@ -1179,31 +1179,31 @@ func rewriteValueARM64_OpARM64ADDshiftRL(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64AND(v *Value) bool {
-       // match: (AND (MOVDconst [c]) x)
+       // match: (AND x (MOVDconst [c]))
        // cond:
        // result: (ANDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARM64ANDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (AND x (MOVDconst [c]))
+       // match: (AND (MOVDconst [c]) x)
        // cond:
        // result: (ANDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARM64ANDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -1237,6 +1237,21 @@ func rewriteValueARM64_OpARM64AND(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (AND (MVN y) x)
+       // cond:
+       // result: (BIC x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MVN {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64BIC)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (AND x (SLLconst [c] y))
        // cond:
        // result: (ANDshiftLL x y [c])
@@ -1342,7 +1357,7 @@ func rewriteValueARM64_OpARM64AND(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64ANDconst(v *Value) bool {
-       // match: (ANDconst [0]  _)
+       // match: (ANDconst [0] _)
        // cond:
        // result: (MOVDconst [0])
        for {
@@ -1666,7 +1681,7 @@ func rewriteValueARM64_OpARM64BIC(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64BICconst(v *Value) bool {
-       // match: (BICconst [0]  x)
+       // match: (BICconst [0] x)
        // cond:
        // result: x
        for {
@@ -2122,7 +2137,7 @@ func rewriteValueARM64_OpARM64CMPWconst(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: x==y
        // result: (FlagEQ)
        for {
@@ -2138,7 +2153,7 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
                v.reset(OpARM64FlagEQ)
                return true
        }
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: int64(x)<int64(y) && uint64(x)<uint64(y)
        // result: (FlagLT_ULT)
        for {
@@ -2154,7 +2169,7 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
                v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: int64(x)<int64(y) && uint64(x)>uint64(y)
        // result: (FlagLT_UGT)
        for {
@@ -2170,7 +2185,7 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
                v.reset(OpARM64FlagLT_UGT)
                return true
        }
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: int64(x)>int64(y) && uint64(x)<uint64(y)
        // result: (FlagGT_ULT)
        for {
@@ -2186,7 +2201,7 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
                v.reset(OpARM64FlagGT_ULT)
                return true
        }
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: int64(x)>int64(y) && uint64(x)>uint64(y)
        // result: (FlagGT_UGT)
        for {
@@ -2566,7 +2581,7 @@ func rewriteValueARM64_OpARM64CSELULT0(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64DIV(v *Value) bool {
-       // match: (DIV   (MOVDconst [c]) (MOVDconst [d]))
+       // match: (DIV (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(c)/int64(d)])
        for {
@@ -2587,7 +2602,7 @@ func rewriteValueARM64_OpARM64DIV(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64DIVW(v *Value) bool {
-       // match: (DIVW  (MOVDconst [c]) (MOVDconst [d]))
+       // match: (DIVW (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(int32(c)/int32(d))])
        for {
@@ -3500,7 +3515,7 @@ func rewriteValueARM64_OpARM64LessThanU(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64MOD(v *Value) bool {
-       // match: (MOD   (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MOD (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(c)%int64(d)])
        for {
@@ -3521,7 +3536,7 @@ func rewriteValueARM64_OpARM64MOD(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64MODW(v *Value) bool {
-       // match: (MODW  (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MODW (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(int32(c)%int32(d))])
        for {
@@ -3748,7 +3763,7 @@ func rewriteValueARM64_OpARM64MOVBreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBreg  (MOVDconst [c]))
+       // match: (MOVBreg (MOVDconst [c]))
        // cond:
        // result: (MOVDconst [int64(int8(c))])
        for {
@@ -4099,7 +4114,7 @@ func rewriteValueARM64_OpARM64MOVDreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVDreg  (MOVDconst [c]))
+       // match: (MOVDreg (MOVDconst [c]))
        // cond:
        // result: (MOVDconst [c])
        for {
@@ -4519,7 +4534,7 @@ func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg  (MOVDconst [c]))
+       // match: (MOVHreg (MOVDconst [c]))
        // cond:
        // result: (MOVDconst [int64(int16(c))])
        for {
@@ -5095,7 +5110,7 @@ func rewriteValueARM64_OpARM64MOVWreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg  (MOVDconst [c]))
+       // match: (MOVWreg (MOVDconst [c]))
        // cond:
        // result: (MOVDconst [int64(int32(c))])
        for {
@@ -5297,6 +5312,22 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [-1]) x)
+       // cond:
+       // result: (NEG x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL _ (MOVDconst [0]))
        // cond:
        // result: (MOVDconst [0])
@@ -5312,6 +5343,21 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (MUL (MOVDconst [0]) _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
        // match: (MUL x (MOVDconst [1]))
        // cond:
        // result: x
@@ -5329,6 +5375,23 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [1]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: isPowerOfTwo(c)
        // result: (SLLconst [log2(c)] x)
@@ -5347,6 +5410,24 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: isPowerOfTwo(c-1) && c >= 3
        // result: (ADDshiftLL x x [log2(c-1)])
@@ -5366,6 +5447,25 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: isPowerOfTwo(c+1) && c >= 7
        // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
@@ -5387,6 +5487,27 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: c%3 == 0 && isPowerOfTwo(c/3)
        // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
@@ -5409,6 +5530,28 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: c%5 == 0 && isPowerOfTwo(c/5)
        // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
@@ -5431,6 +5574,28 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: c%7 == 0 && isPowerOfTwo(c/7)
        // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
@@ -5455,6 +5620,30 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: c%9 == 0 && isPowerOfTwo(c/9)
        // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
@@ -5477,57 +5666,156 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [-1]) x)
-       // cond:
-       // result: (NEG x)
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != -1 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [0]) _)
+       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (MOVDconst [c*d])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 0 {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
+               d := v_1.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.AuxInt = c * d
                return true
        }
-       // match: (MUL (MOVDconst [1]) x)
+       // match: (MUL (MOVDconst [d]) (MOVDconst [c]))
        // cond:
-       // result: x
+       // result: (MOVDconst [c*d])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 1 {
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c * d
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MULW(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: (NEG x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: (NEG x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
+               c := v_0.AuxInt
                x := v.Args[1]
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW _ (MOVDconst [c]))
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == 1) {
+                       break
+               }
                v.reset(OpCopy)
                v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: x
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -5535,6 +5823,24 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
                if !(isPowerOfTwo(c)) {
                        break
                }
@@ -5543,7 +5849,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
+       // match: (MULW (MOVDconst [c]) x)
        // cond: isPowerOfTwo(c)
        // result: (SLLconst [log2(c)] x)
        for {
@@ -5561,8 +5867,27 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && c >= 3
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
        // result: (ADDshiftLL x x [log2(c-1)])
        for {
                v_0 := v.Args[0]
@@ -5571,7 +5896,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
                v.reset(OpARM64ADDshiftLL)
@@ -5580,8 +5905,29 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && c >= 7
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
        // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
        for {
                v_0 := v.Args[0]
@@ -5590,7 +5936,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
                v.reset(OpARM64ADDshiftLL)
@@ -5601,8 +5947,30 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
        // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
        for {
                v_0 := v.Args[0]
@@ -5611,7 +5979,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
@@ -5623,8 +5991,30 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
        // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
        for {
                v_0 := v.Args[0]
@@ -5633,7 +6023,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
@@ -5645,17 +6035,17 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
        // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
@@ -5669,9 +6059,9 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -5679,21 +6069,67 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
+               v.AuxInt = log2(c / 7)
                v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
                v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MUL   (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (MOVDconst [c*d])
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -5706,105 +6142,143 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                d := v_1.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = c * d
+               v.AuxInt = int64(int32(c) * int32(d))
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // match: (MULW (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(int32(c) == -1) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MVN(v *Value) bool {
+       // match: (MVN (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = ^c
                return true
        }
-       // match: (MULW _ (MOVDconst [c]))
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       return false
+}
+func rewriteValueARM64_OpARM64NEG(v *Value) bool {
+       // match: (NEG (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-c])
        for {
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 0) {
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -c
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NotEqual(v *Value) bool {
+       // match: (NotEqual (FlagEQ))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
                v.reset(OpARM64MOVDconst)
                v.AuxInt = 0
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==1
-       // result: x
+       // match: (NotEqual (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(int32(c) == 1) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (NotEqual (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+       // match: (NotEqual (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (InvertFlags x))
+       // cond:
+       // result: (NotEqual x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
+               x := v_0.Args[0]
+               v.reset(OpARM64NotEqual)
                v.AddArg(x)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       return false
+}
+func rewriteValueARM64_OpARM64OR(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR x (MOVDconst [c]))
+       // cond:
+       // result: (ORconst  [c] x)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
@@ -5812,628 +6286,900 @@ func rewriteValueARM64_OpARM64MULW(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (MOVDconst [c]) x)
+       // cond:
+       // result: (ORconst  [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       // match: (OR x x)
+       // cond:
+       // result: x
        for {
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               if x != v.Args[1] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       // match: (OR x (SLLconst [c] y))
+       // cond:
+       // result: (ORshiftLL  x y [c])
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               y := v_1.Args[0]
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       // match: (OR (SLLconst [c] y) x)
+       // cond:
+       // result: (ORshiftLL  x y [c])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       // match: (OR x (SRLconst [c] y))
+       // cond:
+       // result: (ORshiftRL  x y [c])
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               y := v_1.Args[0]
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // match: (OR (SRLconst [c] y) x)
+       // cond:
+       // result: (ORshiftRL  x y [c])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
                c := v_0.AuxInt
+               y := v_0.Args[0]
                x := v.Args[1]
-               if !(int32(c) == -1) {
-                       break
-               }
-               v.reset(OpARM64NEG)
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW (MOVDconst [c]) _)
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       // match: (OR x (SRAconst [c] y))
+       // cond:
+       // result: (ORshiftRA  x y [c])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               if !(int32(c) == 0) {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==1
-       // result: x
+       // match: (OR (SRAconst [c] y) x)
+       // cond:
+       // result: (ORshiftRA  x y [c])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SRAconst {
                        break
                }
                c := v_0.AuxInt
+               y := v_0.Args[0]
                x := v.Args[1]
-               if !(int32(c) == 1) {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               t := v.Type
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               if o0.AuxInt != 8 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               if o1.AuxInt != 16 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               if s0.AuxInt != 24 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               i3 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if mem != x1.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW  (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c)*int32(d))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               i1 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) * int32(d))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MVN(v *Value) bool {
-       // match: (MVN (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [^c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if p != x2.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = ^c
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NEG(v *Value) bool {
-       // match: (NEG (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [-c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if mem != x2.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -c
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NotEqual(v *Value) bool {
-       // match: (NotEqual (FlagEQ))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (NotEqual (FlagLT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagLT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               i0 := x3.AuxInt
+               if x3.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagGT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               if p != x3.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if mem != x3.Args[1] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (InvertFlags x))
-       // cond:
-       // result: (NotEqual x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64NotEqual)
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64OR(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (OR  (MOVDconst [c]) x)
-       // cond:
-       // result: (ORconst  [c] x)
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               t := v.Type
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR  x (MOVDconst [c]))
-       // cond:
-       // result: (ORconst  [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR  x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               i0 := x3.AuxInt
+               s := x3.Aux
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR  x s:(SLLconst [c] y))
-       // cond: s.Uses == 1 && clobber(s)
-       // result: (ORshiftLL  x y [c])
-       for {
-               x := v.Args[0]
-               s := v.Args[1]
-               if s.Op != OpARM64SLLconst {
+               if o0.AuxInt != 8 {
                        break
                }
-               c := s.AuxInt
-               y := s.Args[0]
-               if !(s.Uses == 1 && clobber(s)) {
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  s:(SLLconst [c] y) x)
-       // cond: s.Uses == 1 && clobber(s)
-       // result: (ORshiftLL  x y [c])
-       for {
-               s := v.Args[0]
-               if s.Op != OpARM64SLLconst {
+               if o1.AuxInt != 16 {
                        break
                }
-               c := s.AuxInt
-               y := s.Args[0]
-               x := v.Args[1]
-               if !(s.Uses == 1 && clobber(s)) {
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  x (SLLconst [c] y))
-       // cond:
-       // result: (ORshiftLL  x y [c])
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if s0.AuxInt != 24 {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  (SLLconst [c] y) x)
-       // cond:
-       // result: (ORshiftLL  x y [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_0.AuxInt
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  x (SRLconst [c] y))
-       // cond:
-       // result: (ORshiftRL  x y [c])
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  (SRLconst [c] y) x)
-       // cond:
-       // result: (ORshiftRL  x y [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               i3 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  x (SRAconst [c] y))
-       // cond:
-       // result: (ORshiftRA  x y [c])
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRAconst {
+               t := v.Type
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x6.AuxInt
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if mem != x6.Args[1] {
+                       break
+               }
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               if x7.Aux != s {
+                       break
+               }
+               if p != x7.Args[0] {
+                       break
+               }
+               if mem != x7.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       for {
+               t := v.Type
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               s := x7.Aux
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               v.reset(OpARM64ORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x6.AuxInt
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if mem != x6.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR  (SRAconst [c] y) x)
-       // cond:
-       // result: (ORshiftRA  x y [c])
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRAconst {
+               t := v.Type
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64ORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
+               if o0.AuxInt != 8 {
+                       break
+               }
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]        y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))    y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1   && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i-3] p) mem)
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
-               o0 := v.Args[0]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x3.AuxInt
+               s := x3.Aux
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -6462,10 +7208,16 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
                y1 := o1.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -6474,9 +7226,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -6494,9 +7244,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i-2 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -6506,44 +7254,26 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               y3 := v.Args[1]
-               if y3.Op != OpARM64MOVDnop {
-                       break
-               }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x3.AuxInt != i-3 {
-                       break
-               }
-               if x3.Aux != s {
-                       break
-               }
-               if p != x3.Args[0] {
-                       break
-               }
-               if mem != x3.Args[1] {
-                       break
-               }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i - 3
-               v1.AddArg(p)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]    y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))    y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))    y4:(MOVDnop x4:(MOVBUload [i-4] {s} p mem)))    y5:(MOVDnop x5:(MOVBUload [i-5] {s} p mem)))    y6:(MOVDnop x6:(MOVBUload [i-6] {s} p mem)))    y7:(MOVDnop x7:(MOVBUload [i-7] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1   && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i-7] p) mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                o0 := v.Args[0]
@@ -6603,7 +7333,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -6615,9 +7345,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -6635,9 +7363,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i-2 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -6655,9 +7381,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != i-3 {
-                       break
-               }
+               i3 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -6675,9 +7399,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != i-4 {
-                       break
-               }
+               i4 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -6695,9 +7417,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               if x5.AuxInt != i-5 {
-                       break
-               }
+               i5 := x5.AuxInt
                if x5.Aux != s {
                        break
                }
@@ -6715,9 +7435,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               if x6.AuxInt != i-6 {
-                       break
-               }
+               i6 := x6.AuxInt
                if x6.Aux != s {
                        break
                }
@@ -6735,9 +7453,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x7.Op != OpARM64MOVBUload {
                        break
                }
-               if x7.AuxInt != i-7 {
-                       break
-               }
+               i7 := x7.AuxInt
                if x7.Aux != s {
                        break
                }
@@ -6747,7 +7463,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if mem != x7.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
@@ -6757,134 +7473,31 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i - 7
+               v2.AuxInt = i0
                v2.AddArg(p)
                v1.AddArg(v2)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]        y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))    y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i+2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i+3] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1   && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i] p) mem))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o0.AuxInt != 8 {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o1.AuxInt != 16 {
-                       break
-               }
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
-                       break
-               }
-               if s0.AuxInt != 24 {
-                       break
-               }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
-                       break
-               }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x1.AuxInt != i+1 {
-                       break
-               }
-               if x1.Aux != s {
-                       break
-               }
-               if p != x1.Args[0] {
-                       break
-               }
-               if mem != x1.Args[1] {
-                       break
-               }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
-                       break
-               }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x2.AuxInt != i+2 {
-                       break
-               }
-               if x2.Aux != s {
-                       break
-               }
-               if p != x2.Args[0] {
-                       break
-               }
-               if mem != x2.Args[1] {
-                       break
-               }
-               y3 := v.Args[1]
-               if y3.Op != OpARM64MOVDnop {
-                       break
-               }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x3.AuxInt != i+3 {
-                       break
-               }
-               if x3.Aux != s {
-                       break
-               }
-               if p != x3.Args[0] {
-                       break
-               }
-               if mem != x3.Args[1] {
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i
-               v2.AddArg(p)
-               v1.AddArg(v2)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]    y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))    y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i+2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i+3] {s} p mem)))    y4:(MOVDnop x4:(MOVBUload [i+4] {s} p mem)))    y5:(MOVDnop x5:(MOVBUload [i+5] {s} p mem)))    y6:(MOVDnop x6:(MOVBUload [i+6] {s} p mem)))    y7:(MOVDnop x7:(MOVBUload [i+7] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1   && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i] p) mem))
-       for {
-               t := v.Type
-               o0 := v.Args[0]
+               i7 := x7.AuxInt
+               s := x7.Aux
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -6941,10 +7554,16 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -6953,9 +7572,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -6973,9 +7590,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i+2 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -6993,9 +7608,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != i+3 {
-                       break
-               }
+               i3 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -7013,9 +7626,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != i+4 {
-                       break
-               }
+               i4 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -7033,9 +7644,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               if x5.AuxInt != i+5 {
-                       break
-               }
+               i5 := x5.AuxInt
                if x5.Aux != s {
                        break
                }
@@ -7053,9 +7662,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               if x6.AuxInt != i+6 {
-                       break
-               }
+               i6 := x6.AuxInt
                if x6.Aux != s {
                        break
                }
@@ -7065,27 +7672,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if mem != x6.Args[1] {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x7.AuxInt != i+7 {
-                       break
-               }
-               if x7.Aux != s {
-                       break
-               }
-               if p != x7.Args[0] {
-                       break
-               }
-               if mem != x7.Args[1] {
-                       break
-               }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
@@ -7095,7 +7682,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i
+               v2.AuxInt = i0
                v2.AddArg(p)
                v1.AddArg(v2)
                v1.AddArg(mem)
@@ -7105,7 +7692,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
-       // match: (ORconst  [0]  x)
+       // match: (ORconst [0] x)
        // cond:
        // result: x
        for {
@@ -7118,7 +7705,7 @@ func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORconst  [-1] _)
+       // match: (ORconst [-1] _)
        // cond:
        // result: (MOVDconst [-1])
        for {
@@ -7129,7 +7716,7 @@ func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
                v.AuxInt = -1
                return true
        }
-       // match: (ORconst  [c] (MOVDconst [d]))
+       // match: (ORconst [c] (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [c|d])
        for {
@@ -7143,7 +7730,7 @@ func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
                v.AuxInt = c | d
                return true
        }
-       // match: (ORconst  [c] (ORconst [d] x))
+       // match: (ORconst [c] (ORconst [d] x))
        // cond:
        // result: (ORconst [c|d] x)
        for {
@@ -7164,7 +7751,7 @@ func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
 func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (ORshiftLL  (MOVDconst [c]) x [d])
+       // match: (ORshiftLL (MOVDconst [c]) x [d])
        // cond:
        // result: (ORconst  [c] (SLLconst <x.Type> x [d]))
        for {
@@ -7183,7 +7770,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (ORshiftLL  x (MOVDconst [c]) [d])
+       // match: (ORshiftLL x (MOVDconst [c]) [d])
        // cond:
        // result: (ORconst  x [int64(uint64(c)<<uint64(d))])
        for {
@@ -7199,7 +7786,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORshiftLL  x y:(SLLconst x [c]) [d])
+       // match: (ORshiftLL x y:(SLLconst x [c]) [d])
        // cond: c==d
        // result: y
        for {
@@ -7221,7 +7808,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: ( ORshiftLL [c] (SRLconst x [64-c]) x)
+       // match: (ORshiftLL [c] (SRLconst x [64-c]) x)
        // cond:
        // result: (RORconst [64-c] x)
        for {
@@ -7242,7 +7829,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x)
+       // match: (ORshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x)
        // cond: c < 32 && t.Size() == 4
        // result: (RORWconst [32-c] x)
        for {
@@ -7271,9 +7858,9 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORshiftLL <t> [8]    y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem))     y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1   && y0.Uses == 1 && y1.Uses == 1         && mergePoint(b,x0,x1) != nil   && clobber(x0) && clobber(x1)   && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       // cond: i1 == i0+1     && x0.Uses == 1 && x1.Uses == 1         && y0.Uses == 1 && y1.Uses == 1         && mergePoint(b,x0,x1) != nil   && clobber(x0) && clobber(x1)   && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
                t := v.Type
                if v.AuxInt != 8 {
@@ -7287,7 +7874,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7299,9 +7886,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7311,7 +7896,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -7320,15 +7905,15 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(v0)
                v0.Aux = s
                v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i
+               v1.AuxInt = i0
                v1.AddArg(p)
                v0.AddArg(v1)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16]                    x0:(MOVHUload [i]   {s} p mem)      y1:(MOVDnop x1:(MOVBUload [i+2] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i+3] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1   && y1.Uses == 1 && y2.Uses == 1         && o0.Uses == 1         && mergePoint(b,x0,x1,x2) != nil        && clobber(x0) && clobber(x1) && clobber(x2)    && clobber(y1) && clobber(y2)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
+       // cond: i2 == i0+2     && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1         && y1.Uses == 1 && y2.Uses == 1         && o0.Uses == 1         && mergePoint(b,x0,x1,x2) != nil        && clobber(x0) && clobber(x1) && clobber(x2)    && clobber(y1) && clobber(y2)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
                t := v.Type
                if v.AuxInt != 24 {
@@ -7345,7 +7930,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVHUload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7357,9 +7942,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i+2 {
-                       break
-               }
+               i2 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7377,9 +7960,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i+3 {
-                       break
-               }
+               i3 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -7389,7 +7970,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2)
@@ -7398,15 +7979,15 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(v0)
                v0.Aux = s
                v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i
+               v1.AuxInt = i0
                v1.AddArg(p)
                v0.AddArg(v1)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]              x0:(MOVWUload [i]   {s} p mem)      y1:(MOVDnop x1:(MOVBUload [i+4] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i+5] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i+6] {s} p mem)))    y4:(MOVDnop x4:(MOVBUload [i+7] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1   && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4) != nil  && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)      && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)     && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
+       // cond: i4 == i0+4     && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1         && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4) != nil  && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)      && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)     && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
                t := v.Type
                if v.AuxInt != 56 {
@@ -7437,7 +8018,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVWUload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7449,9 +8030,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i+4 {
-                       break
-               }
+               i4 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7469,9 +8048,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i+5 {
-                       break
-               }
+               i5 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -7489,9 +8066,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != i+6 {
-                       break
-               }
+               i6 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -7509,9 +8084,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != i+7 {
-                       break
-               }
+               i7 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -7521,7 +8094,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x4.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
@@ -7530,15 +8103,15 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(v0)
                v0.Aux = s
                v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i
+               v1.AuxInt = i0
                v1.AddArg(p)
                v0.AddArg(v1)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [8]    y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem))     y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       // cond: ((i-1)%2 == 0 || i-1<256 && i-1>-256 && !isArg(s) && !isAuto(s))       && x0.Uses == 1 && x1.Uses == 1         && y0.Uses == 1 && y1.Uses == 1         && mergePoint(b,x0,x1) != nil   && clobber(x0) && clobber(x1)   && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i-1] {s} p mem))
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && (i0%2 == 0 || i0<256 && i0>-256 && !isArg(s) && !isAuto(s))  && x0.Uses == 1 && x1.Uses == 1         && y0.Uses == 1 && y1.Uses == 1         && mergePoint(b,x0,x1) != nil   && clobber(x0) && clobber(x1)   && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
        for {
                t := v.Type
                if v.AuxInt != 8 {
@@ -7552,7 +8125,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
+               i1 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7564,9 +8137,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i0 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7576,7 +8147,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               if !(((i-1)%2 == 0 || i-1 < 256 && i-1 > -256 && !isArg(s) && !isAuto(s)) && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               if !(i1 == i0+1 && (i0%2 == 0 || i0 < 256 && i0 > -256 && !isArg(s) && !isAuto(s)) && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -7584,16 +8155,16 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
-               v1.AuxInt = i - 1
+               v1.AuxInt = i0
                v1.Aux = s
                v1.AddArg(p)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16]        y0:(REV16W  x0:(MOVHUload [i]   {s} p mem))     y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1   && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1         && o0.Uses == 1         && mergePoint(b,x0,x1,x2) != nil        && clobber(x0) && clobber(x1) && clobber(x2)    && clobber(y0) && clobber(y1) && clobber(y2)    && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i-2] p) mem))
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [i2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1         && o0.Uses == 1         && mergePoint(b,x0,x1,x2) != nil        && clobber(x0) && clobber(x1) && clobber(x2)    && clobber(y0) && clobber(y1) && clobber(y2)    && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                if v.AuxInt != 24 {
@@ -7614,7 +8185,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVHUload {
                        break
                }
-               i := x0.AuxInt
+               i2 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7626,9 +8197,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7646,9 +8215,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i-2 {
-                       break
-               }
+               i0 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -7658,7 +8225,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2)
@@ -7668,16 +8235,16 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i - 2
+               v2.AuxInt = i0
                v2.AddArg(p)
                v1.AddArg(v2)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]  y0:(REVW    x0:(MOVWUload [i]   {s} p mem))     y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))    y4:(MOVDnop x4:(MOVBUload [i-4] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1   && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4) != nil  && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)      && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)      && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i-4] p) mem))
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [i4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4) != nil  && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)      && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)      && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                if v.AuxInt != 56 {
@@ -7712,7 +8279,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVWUload {
                        break
                }
-               i := x0.AuxInt
+               i4 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7724,9 +8291,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i3 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7744,9 +8309,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i-2 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -7764,9 +8327,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != i-3 {
-                       break
-               }
+               i1 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -7784,9 +8345,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != i-4 {
-                       break
-               }
+               i0 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -7796,7 +8355,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x4.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
@@ -7806,7 +8365,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i - 4
+               v2.AuxInt = i0
                v2.AddArg(p)
                v1.AddArg(v2)
                v1.AddArg(mem)
@@ -7818,7 +8377,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
 func rewriteValueARM64_OpARM64ORshiftRA(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (ORshiftRA  (MOVDconst [c]) x [d])
+       // match: (ORshiftRA (MOVDconst [c]) x [d])
        // cond:
        // result: (ORconst  [c] (SRAconst <x.Type> x [d]))
        for {
@@ -7837,7 +8396,7 @@ func rewriteValueARM64_OpARM64ORshiftRA(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (ORshiftRA  x (MOVDconst [c]) [d])
+       // match: (ORshiftRA x (MOVDconst [c]) [d])
        // cond:
        // result: (ORconst  x [int64(int64(c)>>uint64(d))])
        for {
@@ -7853,7 +8412,7 @@ func rewriteValueARM64_OpARM64ORshiftRA(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORshiftRA  x y:(SRAconst x [c]) [d])
+       // match: (ORshiftRA x y:(SRAconst x [c]) [d])
        // cond: c==d
        // result: y
        for {
@@ -7880,7 +8439,7 @@ func rewriteValueARM64_OpARM64ORshiftRA(v *Value) bool {
 func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (ORshiftRL  (MOVDconst [c]) x [d])
+       // match: (ORshiftRL (MOVDconst [c]) x [d])
        // cond:
        // result: (ORconst  [c] (SRLconst <x.Type> x [d]))
        for {
@@ -7899,7 +8458,7 @@ func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (ORshiftRL  x (MOVDconst [c]) [d])
+       // match: (ORshiftRL x (MOVDconst [c]) [d])
        // cond:
        // result: (ORconst  x [int64(uint64(c)>>uint64(d))])
        for {
@@ -7915,7 +8474,7 @@ func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORshiftRL  x y:(SRLconst x [c]) [d])
+       // match: (ORshiftRL x y:(SRLconst x [c]) [d])
        // cond: c==d
        // result: y
        for {
@@ -7937,7 +8496,7 @@ func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: ( ORshiftRL [c] (SLLconst x [64-c]) x)
+       // match: (ORshiftRL [c] (SLLconst x [64-c]) x)
        // cond:
        // result: (RORconst [   c] x)
        for {
@@ -7958,7 +8517,7 @@ func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
+       // match: (ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
        // cond: c < 32 && t.Size() == 4
        // result: (RORWconst [   c] x)
        for {
@@ -8216,7 +8775,7 @@ func rewriteValueARM64_OpARM64SUB(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64SUBconst(v *Value) bool {
-       // match: (SUBconst [0]  x)
+       // match: (SUBconst [0] x)
        // cond:
        // result: x
        for {
@@ -8433,7 +8992,7 @@ func rewriteValueARM64_OpARM64UDIV(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (UDIV  (MOVDconst [c]) (MOVDconst [d]))
+       // match: (UDIV (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(uint64(c)/uint64(d))])
        for {
@@ -8544,7 +9103,7 @@ func rewriteValueARM64_OpARM64UMOD(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (UMOD  (MOVDconst [c]) (MOVDconst [d]))
+       // match: (UMOD (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(uint64(c)%uint64(d))])
        for {
@@ -8620,31 +9179,31 @@ func rewriteValueARM64_OpARM64UMODW(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64XOR(v *Value) bool {
-       // match: (XOR (MOVDconst [c]) x)
+       // match: (XOR x (MOVDconst [c]))
        // cond:
        // result: (XORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARM64XORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XOR x (MOVDconst [c]))
+       // match: (XOR (MOVDconst [c]) x)
        // cond:
        // result: (XORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARM64XORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -8767,7 +9326,7 @@ func rewriteValueARM64_OpARM64XOR(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64XORconst(v *Value) bool {
-       // match: (XORconst [0]  x)
+       // match: (XORconst [0] x)
        // cond:
        // result: x
        for {
@@ -9386,7 +9945,7 @@ func rewriteValueARM64_OpAtomicExchange64(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicLoad32(v *Value) bool {
-       // match: (AtomicLoad32  ptr mem)
+       // match: (AtomicLoad32 ptr mem)
        // cond:
        // result: (LDARW ptr mem)
        for {
@@ -9399,7 +9958,7 @@ func rewriteValueARM64_OpAtomicLoad32(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicLoad64(v *Value) bool {
-       // match: (AtomicLoad64  ptr mem)
+       // match: (AtomicLoad64 ptr mem)
        // cond:
        // result: (LDAR  ptr mem)
        for {
@@ -9425,7 +9984,7 @@ func rewriteValueARM64_OpAtomicLoadPtr(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
-       // match: (AtomicOr8  ptr val mem)
+       // match: (AtomicOr8 ptr val mem)
        // cond:
        // result: (LoweredAtomicOr8  ptr val mem)
        for {
@@ -9440,7 +9999,7 @@ func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicStore32(v *Value) bool {
-       // match: (AtomicStore32      ptr val mem)
+       // match: (AtomicStore32 ptr val mem)
        // cond:
        // result: (STLRW ptr val mem)
        for {
@@ -9455,7 +10014,7 @@ func rewriteValueARM64_OpAtomicStore32(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicStore64(v *Value) bool {
-       // match: (AtomicStore64      ptr val mem)
+       // match: (AtomicStore64 ptr val mem)
        // cond:
        // result: (STLR  ptr val mem)
        for {
@@ -11446,7 +12005,7 @@ func rewriteValueARM64_OpLsh16x32(v *Value) bool {
 func rewriteValueARM64_OpLsh16x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x64  x (MOVDconst [c]))
+       // match: (Lsh16x64 x (MOVDconst [c]))
        // cond: uint64(c) < 16
        // result: (SLLconst x [c])
        for {
@@ -11464,7 +12023,7 @@ func rewriteValueARM64_OpLsh16x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh16x64  _ (MOVDconst [c]))
+       // match: (Lsh16x64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 16
        // result: (MOVDconst [0])
        for {
@@ -11507,7 +12066,7 @@ func rewriteValueARM64_OpLsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (CSELULT (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -11598,7 +12157,7 @@ func rewriteValueARM64_OpLsh32x32(v *Value) bool {
 func rewriteValueARM64_OpLsh32x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x64  x (MOVDconst [c]))
+       // match: (Lsh32x64 x (MOVDconst [c]))
        // cond: uint64(c) < 32
        // result: (SLLconst x [c])
        for {
@@ -11616,7 +12175,7 @@ func rewriteValueARM64_OpLsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh32x64  _ (MOVDconst [c]))
+       // match: (Lsh32x64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 32
        // result: (MOVDconst [0])
        for {
@@ -11659,7 +12218,7 @@ func rewriteValueARM64_OpLsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (CSELULT (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -11750,7 +12309,7 @@ func rewriteValueARM64_OpLsh64x32(v *Value) bool {
 func rewriteValueARM64_OpLsh64x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x64  x (MOVDconst [c]))
+       // match: (Lsh64x64 x (MOVDconst [c]))
        // cond: uint64(c) < 64
        // result: (SLLconst x [c])
        for {
@@ -11768,7 +12327,7 @@ func rewriteValueARM64_OpLsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh64x64  _ (MOVDconst [c]))
+       // match: (Lsh64x64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 64
        // result: (MOVDconst [0])
        for {
@@ -11811,7 +12370,7 @@ func rewriteValueARM64_OpLsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x8  <t> x y)
+       // match: (Lsh64x8 <t> x y)
        // cond:
        // result: (CSELULT (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -11902,7 +12461,7 @@ func rewriteValueARM64_OpLsh8x32(v *Value) bool {
 func rewriteValueARM64_OpLsh8x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x64   x (MOVDconst [c]))
+       // match: (Lsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SLLconst x [c])
        for {
@@ -11920,7 +12479,7 @@ func rewriteValueARM64_OpLsh8x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh8x64   _ (MOVDconst [c]))
+       // match: (Lsh8x64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 8
        // result: (MOVDconst [0])
        for {
@@ -11963,7 +12522,7 @@ func rewriteValueARM64_OpLsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (CSELULT (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -13076,7 +13635,7 @@ func rewriteValueARM64_OpRsh16Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (CSELULT (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -13173,7 +13732,7 @@ func rewriteValueARM64_OpRsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x64  x (MOVDconst [c]))
+       // match: (Rsh16x64 x (MOVDconst [c]))
        // cond: uint64(c) < 16
        // result: (SRAconst (SignExt16to64 x) [c])
        for {
@@ -13241,7 +13800,7 @@ func rewriteValueARM64_OpRsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x8  x y)
+       // match: (Rsh16x8 x y)
        // cond:
        // result: (SRA (SignExt16to64 x) (CSELULT <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
        for {
@@ -13404,7 +13963,7 @@ func rewriteValueARM64_OpRsh32Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (CSELULT (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -13501,7 +14060,7 @@ func rewriteValueARM64_OpRsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x64  x (MOVDconst [c]))
+       // match: (Rsh32x64 x (MOVDconst [c]))
        // cond: uint64(c) < 32
        // result: (SRAconst (SignExt32to64 x) [c])
        for {
@@ -13569,7 +14128,7 @@ func rewriteValueARM64_OpRsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x8  x y)
+       // match: (Rsh32x8 x y)
        // cond:
        // result: (SRA (SignExt32to64 x) (CSELULT <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
        for {
@@ -13722,7 +14281,7 @@ func rewriteValueARM64_OpRsh64Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64Ux8  <t> x y)
+       // match: (Rsh64Ux8 <t> x y)
        // cond:
        // result: (CSELULT (SRL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -13811,7 +14370,7 @@ func rewriteValueARM64_OpRsh64x32(v *Value) bool {
 func rewriteValueARM64_OpRsh64x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x64  x (MOVDconst [c]))
+       // match: (Rsh64x64 x (MOVDconst [c]))
        // cond: uint64(c) < 64
        // result: (SRAconst x [c])
        for {
@@ -13873,7 +14432,7 @@ func rewriteValueARM64_OpRsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x8  x y)
+       // match: (Rsh64x8 x y)
        // cond:
        // result: (SRA x (CSELULT <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
        for {
@@ -13969,7 +14528,7 @@ func rewriteValueARM64_OpRsh8Ux64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux64  x (MOVDconst [c]))
+       // match: (Rsh8Ux64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SRLconst (ZeroExt8to64  x) [c])
        for {
@@ -13989,7 +14548,7 @@ func rewriteValueARM64_OpRsh8Ux64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux64  _ (MOVDconst [c]))
+       // match: (Rsh8Ux64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 8
        // result: (MOVDconst [0])
        for {
@@ -14034,7 +14593,7 @@ func rewriteValueARM64_OpRsh8Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (CSELULT (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -14131,7 +14690,7 @@ func rewriteValueARM64_OpRsh8x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x64   x (MOVDconst [c]))
+       // match: (Rsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SRAconst (SignExt8to64  x) [c])
        for {
@@ -14151,7 +14710,7 @@ func rewriteValueARM64_OpRsh8x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x64  x (MOVDconst [c]))
+       // match: (Rsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) >= 8
        // result: (SRAconst (SignExt8to64  x) [63])
        for {
@@ -14199,7 +14758,7 @@ func rewriteValueARM64_OpRsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x8  x y)
+       // match: (Rsh8x8 x y)
        // cond:
        // result: (SRA (SignExt8to64 x) (CSELULT <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
        for {
index 19144108e75224c0dca1f714a2ebf099fffda476..1045a4e5d111007e14adfda6c71034c072550100 100644 (file)
@@ -686,7 +686,7 @@ func rewriteValueMIPS_OpAtomicAnd8(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (AtomicAnd8  ptr val mem)
+       // match: (AtomicAnd8 ptr val mem)
        // cond: !config.BigEndian
        // result: (LoweredAtomicAnd (AND <types.UInt32Ptr> (MOVWconst [^3]) ptr)               (OR <types.UInt32> (SLL <types.UInt32> (ZeroExt8to32 val)                       (SLLconst <types.UInt32> [3]                            (ANDconst  <types.UInt32> [3] ptr)))            (NORconst [0] <types.UInt32> (SLL <types.UInt32>                        (MOVWconst [0xff]) (SLLconst <types.UInt32> [3]                                 (ANDconst <types.UInt32> [3] ptr))))) mem)
        for {
@@ -735,7 +735,7 @@ func rewriteValueMIPS_OpAtomicAnd8(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (AtomicAnd8  ptr val mem)
+       // match: (AtomicAnd8 ptr val mem)
        // cond: config.BigEndian
        // result: (LoweredAtomicAnd (AND <types.UInt32Ptr> (MOVWconst [^3]) ptr)               (OR <types.UInt32> (SLL <types.UInt32> (ZeroExt8to32 val)                       (SLLconst <types.UInt32> [3]                            (ANDconst  <types.UInt32> [3]                                   (XORconst <types.UInt32> [3] ptr))))            (NORconst [0] <types.UInt32> (SLL <types.UInt32>                        (MOVWconst [0xff]) (SLLconst <types.UInt32> [3]                                 (ANDconst <types.UInt32> [3]                                    (XORconst <types.UInt32> [3] ptr)))))) mem)
        for {
@@ -825,7 +825,7 @@ func rewriteValueMIPS_OpAtomicExchange32(v *Value) bool {
        }
 }
 func rewriteValueMIPS_OpAtomicLoad32(v *Value) bool {
-       // match: (AtomicLoad32  ptr mem)
+       // match: (AtomicLoad32 ptr mem)
        // cond:
        // result: (LoweredAtomicLoad ptr mem)
        for {
@@ -927,7 +927,7 @@ func rewriteValueMIPS_OpAtomicOr8(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpAtomicStore32(v *Value) bool {
-       // match: (AtomicStore32      ptr val mem)
+       // match: (AtomicStore32 ptr val mem)
        // cond:
        // result: (LoweredAtomicStore ptr val mem)
        for {
@@ -2777,31 +2777,31 @@ func rewriteValueMIPS_OpLsh8x8(v *Value) bool {
        }
 }
 func rewriteValueMIPS_OpMIPSADD(v *Value) bool {
-       // match: (ADD (MOVWconst [c]) x)
+       // match: (ADD x (MOVWconst [c]))
        // cond:
        // result: (ADDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSADDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADD x (MOVWconst [c]))
+       // match: (ADD (MOVWconst [c]) x)
        // cond:
        // result: (ADDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSADDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -2858,7 +2858,7 @@ func rewriteValueMIPS_OpMIPSADDconst(v *Value) bool {
                v.AddArg(ptr)
                return true
        }
-       // match: (ADDconst [0]  x)
+       // match: (ADDconst [0] x)
        // cond:
        // result: x
        for {
@@ -2922,31 +2922,31 @@ func rewriteValueMIPS_OpMIPSADDconst(v *Value) bool {
 func rewriteValueMIPS_OpMIPSAND(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (AND (MOVWconst [c]) x)
+       // match: (AND x (MOVWconst [c]))
        // cond:
        // result: (ANDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSANDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (AND x (MOVWconst [c]))
+       // match: (AND (MOVWconst [c]) x)
        // cond:
        // result: (ANDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSANDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -2993,10 +2993,38 @@ func rewriteValueMIPS_OpMIPSAND(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (AND (SGTUconst [1] y) (SGTUconst [1] x))
+       // cond:
+       // result: (SGTUconst [1] (OR <x.Type> x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSSGTUconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSSGTUconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               x := v_1.Args[0]
+               v.reset(OpMIPSSGTUconst)
+               v.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpMIPSOR, x.Type)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueMIPS_OpMIPSANDconst(v *Value) bool {
-       // match: (ANDconst [0]  _)
+       // match: (ANDconst [0] _)
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -3347,7 +3375,7 @@ func rewriteValueMIPS_OpMIPSMOVBUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVBload(v *Value) bool {
-       // match: (MOVBload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -3490,7 +3518,7 @@ func rewriteValueMIPS_OpMIPSMOVBreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBreg  (MOVWconst [c]))
+       // match: (MOVBreg (MOVWconst [c]))
        // cond:
        // result: (MOVWconst [int64(int8(c))])
        for {
@@ -3737,7 +3765,7 @@ func rewriteValueMIPS_OpMIPSMOVBstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVDload(v *Value) bool {
-       // match: (MOVDload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVDload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVDload  [off1+off2] {sym} ptr mem)
        for {
@@ -3864,7 +3892,7 @@ func rewriteValueMIPS_OpMIPSMOVDstore(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVFload(v *Value) bool {
-       // match: (MOVFload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVFload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVFload  [off1+off2] {sym} ptr mem)
        for {
@@ -4171,7 +4199,7 @@ func rewriteValueMIPS_OpMIPSMOVHUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVHload(v *Value) bool {
-       // match: (MOVHload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVHload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVHload  [off1+off2] {sym} ptr mem)
        for {
@@ -4362,7 +4390,7 @@ func rewriteValueMIPS_OpMIPSMOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg  (MOVWconst [c]))
+       // match: (MOVHreg (MOVWconst [c]))
        // cond:
        // result: (MOVWconst [int64(int16(c))])
        for {
@@ -4567,7 +4595,7 @@ func rewriteValueMIPS_OpMIPSMOVHstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVWload(v *Value) bool {
-       // match: (MOVWload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVWload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVWload  [off1+off2] {sym} ptr mem)
        for {
@@ -4652,7 +4680,7 @@ func rewriteValueMIPS_OpMIPSMOVWreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg  (MOVWconst [c]))
+       // match: (MOVWreg (MOVWconst [c]))
        // cond:
        // result: (MOVWconst [c])
        for {
@@ -4815,7 +4843,7 @@ func rewriteValueMIPS_OpMIPSMOVWstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
-       // match: (MUL (MOVWconst [0]) _ )
+       // match: (MUL (MOVWconst [0]) _)
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -4830,7 +4858,22 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (MUL (MOVWconst [1]) x )
+       // match: (MUL _ (MOVWconst [0]))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MUL (MOVWconst [1]) x)
        // cond:
        // result: x
        for {
@@ -4847,7 +4890,24 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVWconst [-1]) x )
+       // match: (MUL x (MOVWconst [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVWconst [-1]) x)
        // cond:
        // result: (NEG x)
        for {
@@ -4863,7 +4923,23 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVWconst [c]) x )
+       // match: (MUL x (MOVWconst [-1]))
+       // cond:
+       // result: (NEG x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpMIPSNEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVWconst [c]) x)
        // cond: isPowerOfTwo(int64(uint32(c)))
        // result: (SLLconst [log2(int64(uint32(c)))] x)
        for {
@@ -4881,6 +4957,24 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: isPowerOfTwo(int64(uint32(c)))
+       // result: (SLLconst [log2(int64(uint32(c)))] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(int64(uint32(c)))) {
+                       break
+               }
+               v.reset(OpMIPSSLLconst)
+               v.AuxInt = log2(int64(uint32(c)))
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) (MOVWconst [d]))
        // cond:
        // result: (MOVWconst [int64(int32(c)*int32(d))])
@@ -4899,6 +4993,24 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AuxInt = int64(int32(c) * int32(d))
                return true
        }
+       // match: (MUL (MOVWconst [d]) (MOVWconst [c]))
+       // cond:
+       // result: (MOVWconst [int64(int32(c)*int32(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               return true
+       }
        return false
 }
 func rewriteValueMIPS_OpMIPSNEG(v *Value) bool {
@@ -4918,31 +5030,31 @@ func rewriteValueMIPS_OpMIPSNEG(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSNOR(v *Value) bool {
-       // match: (NOR (MOVWconst [c]) x)
+       // match: (NOR x (MOVWconst [c]))
        // cond:
        // result: (NORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSNORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (NOR x (MOVWconst [c]))
+       // match: (NOR (MOVWconst [c]) x)
        // cond:
        // result: (NORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSNORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -4970,37 +5082,37 @@ func rewriteValueMIPS_OpMIPSNORconst(v *Value) bool {
 func rewriteValueMIPS_OpMIPSOR(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (OR  (MOVWconst [c]) x)
+       // match: (OR x (MOVWconst [c]))
        // cond:
        // result: (ORconst  [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (OR  x (MOVWconst [c]))
+       // match: (OR (MOVWconst [c]) x)
        // cond:
        // result: (ORconst  [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (OR  x x)
+       // match: (OR x x)
        // cond:
        // result: x
        for {
@@ -5034,10 +5146,31 @@ func rewriteValueMIPS_OpMIPSOR(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (OR (SGTUzero y) (SGTUzero x))
+       // cond:
+       // result: (SGTUzero (OR <x.Type> x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSSGTUzero {
+                       break
+               }
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSSGTUzero {
+                       break
+               }
+               x := v_1.Args[0]
+               v.reset(OpMIPSSGTUzero)
+               v0 := b.NewValue0(v.Pos, OpMIPSOR, x.Type)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueMIPS_OpMIPSORconst(v *Value) bool {
-       // match: (ORconst  [0]  x)
+       // match: (ORconst [0] x)
        // cond:
        // result: x
        for {
@@ -5050,7 +5183,7 @@ func rewriteValueMIPS_OpMIPSORconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORconst  [-1] _)
+       // match: (ORconst [-1] _)
        // cond:
        // result: (MOVWconst [-1])
        for {
@@ -5094,7 +5227,7 @@ func rewriteValueMIPS_OpMIPSORconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSSGT(v *Value) bool {
-       // match: (SGT  (MOVWconst [c]) x)
+       // match: (SGT (MOVWconst [c]) x)
        // cond:
        // result: (SGTconst  [c] x)
        for {
@@ -5735,7 +5868,7 @@ func rewriteValueMIPS_OpMIPSSUB(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSSUBconst(v *Value) bool {
-       // match: (SUBconst [0]  x)
+       // match: (SUBconst [0] x)
        // cond:
        // result: x
        for {
@@ -5797,31 +5930,31 @@ func rewriteValueMIPS_OpMIPSSUBconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSXOR(v *Value) bool {
-       // match: (XOR (MOVWconst [c]) x)
+       // match: (XOR x (MOVWconst [c]))
        // cond:
        // result: (XORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSXORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XOR x (MOVWconst [c]))
+       // match: (XOR (MOVWconst [c]) x)
        // cond:
        // result: (XORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSXORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -5842,7 +5975,7 @@ func rewriteValueMIPS_OpMIPSXOR(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSXORconst(v *Value) bool {
-       // match: (XORconst [0]  x)
+       // match: (XORconst [0] x)
        // cond:
        // result: x
        for {
@@ -7777,7 +7910,33 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Select0 (MULTU (MOVWconst [0]) _ ))
+       // match: (Select0 (MULTU (MOVWconst [c]) x))
+       // cond: x.Op != OpMIPSMOVWconst
+       // result: (Select0 (MULTU (MOVWconst [c]) x ))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(x.Op != OpMIPSMOVWconst) {
+                       break
+               }
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpMIPSMULTU, MakeTuple(types.UInt32, types.UInt32))
+               v1 := b.NewValue0(v.Pos, OpMIPSMOVWconst, types.UInt32)
+               v1.AuxInt = c
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [0]) _))
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -7796,7 +7955,26 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Select0 (MULTU (MOVWconst [1]) _ ))
+       // match: (Select0 (MULTU _ (MOVWconst [0])))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [1]) _))
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -7815,7 +7993,26 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Select0 (MULTU (MOVWconst [-1]) x ))
+       // match: (Select0 (MULTU _ (MOVWconst [1])))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [-1]) x))
        // cond:
        // result: (CMOVZ (ADDconst <x.Type> [-1] x) (MOVWconst [0]) x)
        for {
@@ -7842,7 +8039,34 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select0 (MULTU (MOVWconst [c]) x ))
+       // match: (Select0 (MULTU x (MOVWconst [-1])))
+       // cond:
+       // result: (CMOVZ (ADDconst <x.Type> [-1] x) (MOVWconst [0]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpMIPSCMOVZ)
+               v0 := b.NewValue0(v.Pos, OpMIPSADDconst, x.Type)
+               v0.AuxInt = -1
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMIPSMOVWconst, types.UInt32)
+               v1.AuxInt = 0
+               v.AddArg(v1)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [c]) x))
        // cond: isPowerOfTwo(int64(uint32(c)))
        // result: (SRLconst [32-log2(int64(uint32(c)))] x)
        for {
@@ -7864,7 +8088,29 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select0 (MULTU  (MOVWconst [c]) (MOVWconst [d])))
+       // match: (Select0 (MULTU x (MOVWconst [c])))
+       // cond: isPowerOfTwo(int64(uint32(c)))
+       // result: (SRLconst [32-log2(int64(uint32(c)))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(int64(uint32(c)))) {
+                       break
+               }
+               v.reset(OpMIPSSRLconst)
+               v.AuxInt = 32 - log2(int64(uint32(c)))
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [c]) (MOVWconst [d])))
        // cond:
        // result: (MOVWconst [(c*d)>>32])
        for {
@@ -7886,7 +8132,29 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AuxInt = (c * d) >> 32
                return true
        }
-       // match: (Select0 (DIV  (MOVWconst [c]) (MOVWconst [d])))
+       // match: (Select0 (MULTU (MOVWconst [d]) (MOVWconst [c])))
+       // cond:
+       // result: (MOVWconst [(c*d)>>32])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               d := v_0_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = (c * d) >> 32
+               return true
+       }
+       // match: (Select0 (DIV (MOVWconst [c]) (MOVWconst [d])))
        // cond:
        // result: (MOVWconst [int64(int32(c)%int32(d))])
        for {
@@ -8003,7 +8271,33 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Select1 (MULTU (MOVWconst [0]) _ ))
+       // match: (Select1 (MULTU (MOVWconst [c]) x))
+       // cond: x.Op != OpMIPSMOVWconst
+       // result: (Select1 (MULTU (MOVWconst [c]) x ))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(x.Op != OpMIPSMOVWconst) {
+                       break
+               }
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpMIPSMULTU, MakeTuple(types.UInt32, types.UInt32))
+               v1 := b.NewValue0(v.Pos, OpMIPSMOVWconst, types.UInt32)
+               v1.AuxInt = c
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [0]) _))
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -8022,7 +8316,26 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Select1 (MULTU (MOVWconst [1]) x ))
+       // match: (Select1 (MULTU _ (MOVWconst [0])))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [1]) x))
        // cond:
        // result: x
        for {
@@ -8043,7 +8356,28 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select1 (MULTU (MOVWconst [-1]) x ))
+       // match: (Select1 (MULTU x (MOVWconst [1])))
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [-1]) x))
        // cond:
        // result: (NEG <x.Type> x)
        for {
@@ -8064,7 +8398,28 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select1 (MULTU (MOVWconst [c]) x ))
+       // match: (Select1 (MULTU x (MOVWconst [-1])))
+       // cond:
+       // result: (NEG <x.Type> x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpMIPSNEG)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [c]) x))
        // cond: isPowerOfTwo(int64(uint32(c)))
        // result: (SLLconst [log2(int64(uint32(c)))] x)
        for {
@@ -8086,7 +8441,29 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select1 (MULTU  (MOVWconst [c]) (MOVWconst [d])))
+       // match: (Select1 (MULTU x (MOVWconst [c])))
+       // cond: isPowerOfTwo(int64(uint32(c)))
+       // result: (SLLconst [log2(int64(uint32(c)))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(int64(uint32(c)))) {
+                       break
+               }
+               v.reset(OpMIPSSLLconst)
+               v.AuxInt = log2(int64(uint32(c)))
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [c]) (MOVWconst [d])))
        // cond:
        // result: (MOVWconst [int64(int32(uint32(c)*uint32(d)))])
        for {
@@ -8108,7 +8485,29 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AuxInt = int64(int32(uint32(c) * uint32(d)))
                return true
        }
-       // match: (Select1 (DIV  (MOVWconst [c]) (MOVWconst [d])))
+       // match: (Select1 (MULTU (MOVWconst [d]) (MOVWconst [c])))
+       // cond:
+       // result: (MOVWconst [int64(int32(uint32(c)*uint32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               d := v_0_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = int64(int32(uint32(c) * uint32(d)))
+               return true
+       }
+       // match: (Select1 (DIV (MOVWconst [c]) (MOVWconst [d])))
        // cond:
        // result: (MOVWconst [int64(int32(c)/int32(d))])
        for {
@@ -9182,7 +9581,7 @@ func rewriteBlockMIPS(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (EQ  (MOVWconst [0]) yes no)
+               // match: (EQ (MOVWconst [0]) yes no)
                // cond:
                // result: (First nil yes no)
                for {
@@ -9201,7 +9600,7 @@ func rewriteBlockMIPS(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (EQ  (MOVWconst [c]) yes no)
+               // match: (EQ (MOVWconst [c]) yes no)
                // cond: c != 0
                // result: (First nil no yes)
                for {
@@ -9653,7 +10052,7 @@ func rewriteBlockMIPS(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE  (MOVWconst [0]) yes no)
+               // match: (NE (MOVWconst [0]) yes no)
                // cond:
                // result: (First nil no yes)
                for {
@@ -9673,7 +10072,7 @@ func rewriteBlockMIPS(b *Block) bool {
                        _ = yes
                        return true
                }
-               // match: (NE  (MOVWconst [c]) yes no)
+               // match: (NE (MOVWconst [c]) yes no)
                // cond: c != 0
                // result: (First nil yes no)
                for {
index e0f16a9f8721f3d4a33640875ca849e8838a333f..74d82003a5f70aa2127639e22d1b9af0562d5c0e 100644 (file)
@@ -2692,7 +2692,7 @@ func rewriteValueMIPS64_OpLsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
        for {
@@ -2816,7 +2816,7 @@ func rewriteValueMIPS64_OpLsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
        for {
@@ -2940,7 +2940,7 @@ func rewriteValueMIPS64_OpLsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x8  <t> x y)
+       // match: (Lsh64x8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
        for {
@@ -3064,7 +3064,7 @@ func rewriteValueMIPS64_OpLsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
        for {
@@ -3092,16 +3092,16 @@ func rewriteValueMIPS64_OpLsh8x8(v *Value) bool {
        }
 }
 func rewriteValueMIPS64_OpMIPS64ADDV(v *Value) bool {
-       // match: (ADDV (MOVVconst [c]) x)
+       // match: (ADDV x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (ADDVconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -3110,16 +3110,16 @@ func rewriteValueMIPS64_OpMIPS64ADDV(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDV x (MOVVconst [c]))
+       // match: (ADDV (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (ADDVconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -3179,7 +3179,7 @@ func rewriteValueMIPS64_OpMIPS64ADDVconst(v *Value) bool {
                v.AddArg(ptr)
                return true
        }
-       // match: (ADDVconst [0]  x)
+       // match: (ADDVconst [0] x)
        // cond:
        // result: x
        for {
@@ -3247,16 +3247,16 @@ func rewriteValueMIPS64_OpMIPS64ADDVconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64AND(v *Value) bool {
-       // match: (AND (MOVVconst [c]) x)
+       // match: (AND x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (ANDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -3265,16 +3265,16 @@ func rewriteValueMIPS64_OpMIPS64AND(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (AND x (MOVVconst [c]))
+       // match: (AND (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (ANDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -3299,7 +3299,7 @@ func rewriteValueMIPS64_OpMIPS64AND(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64ANDconst(v *Value) bool {
-       // match: (ANDconst [0]  _)
+       // match: (ANDconst [0] _)
        // cond:
        // result: (MOVVconst [0])
        for {
@@ -3446,7 +3446,7 @@ func rewriteValueMIPS64_OpMIPS64MOVBUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVBload(v *Value) bool {
-       // match: (MOVBload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -3520,7 +3520,7 @@ func rewriteValueMIPS64_OpMIPS64MOVBreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBreg  (MOVVconst [c]))
+       // match: (MOVBreg (MOVVconst [c]))
        // cond:
        // result: (MOVVconst [int64(int8(c))])
        for {
@@ -3788,7 +3788,7 @@ func rewriteValueMIPS64_OpMIPS64MOVBstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVDload(v *Value) bool {
-       // match: (MOVDload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVDload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVDload  [off1+off2] {sym} ptr mem)
        for {
@@ -3892,7 +3892,7 @@ func rewriteValueMIPS64_OpMIPS64MOVDstore(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVFload(v *Value) bool {
-       // match: (MOVFload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVFload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVFload  [off1+off2] {sym} ptr mem)
        for {
@@ -4110,7 +4110,7 @@ func rewriteValueMIPS64_OpMIPS64MOVHUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVHload(v *Value) bool {
-       // match: (MOVHload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVHload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVHload  [off1+off2] {sym} ptr mem)
        for {
@@ -4232,7 +4232,7 @@ func rewriteValueMIPS64_OpMIPS64MOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg  (MOVVconst [c]))
+       // match: (MOVHreg (MOVVconst [c]))
        // cond:
        // result: (MOVVconst [int64(int16(c))])
        for {
@@ -4458,7 +4458,7 @@ func rewriteValueMIPS64_OpMIPS64MOVHstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVVload(v *Value) bool {
-       // match: (MOVVload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVVload  [off1+off2] {sym} ptr mem)
        for {
@@ -4520,7 +4520,7 @@ func rewriteValueMIPS64_OpMIPS64MOVVreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVVreg  (MOVVconst [c]))
+       // match: (MOVVreg (MOVVconst [c]))
        // cond:
        // result: (MOVVconst [c])
        for {
@@ -4800,7 +4800,7 @@ func rewriteValueMIPS64_OpMIPS64MOVWUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVWload(v *Value) bool {
-       // match: (MOVWload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVWload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVWload  [off1+off2] {sym} ptr mem)
        for {
@@ -4970,7 +4970,7 @@ func rewriteValueMIPS64_OpMIPS64MOVWreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg  (MOVVconst [c]))
+       // match: (MOVWreg (MOVVconst [c]))
        // cond:
        // result: (MOVVconst [int64(int32(c))])
        for {
@@ -5170,16 +5170,16 @@ func rewriteValueMIPS64_OpMIPS64NEGV(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64NOR(v *Value) bool {
-       // match: (NOR (MOVVconst [c]) x)
+       // match: (NOR x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (NORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -5188,16 +5188,16 @@ func rewriteValueMIPS64_OpMIPS64NOR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (NOR x (MOVVconst [c]))
+       // match: (NOR (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (NORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -5226,16 +5226,16 @@ func rewriteValueMIPS64_OpMIPS64NORconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64OR(v *Value) bool {
-       // match: (OR  (MOVVconst [c]) x)
+       // match: (OR x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (ORconst  [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -5244,16 +5244,16 @@ func rewriteValueMIPS64_OpMIPS64OR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (OR  x (MOVVconst [c]))
+       // match: (OR (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (ORconst  [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -5262,7 +5262,7 @@ func rewriteValueMIPS64_OpMIPS64OR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (OR  x x)
+       // match: (OR x x)
        // cond:
        // result: x
        for {
@@ -5278,7 +5278,7 @@ func rewriteValueMIPS64_OpMIPS64OR(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64ORconst(v *Value) bool {
-       // match: (ORconst  [0]  x)
+       // match: (ORconst [0] x)
        // cond:
        // result: x
        for {
@@ -5291,7 +5291,7 @@ func rewriteValueMIPS64_OpMIPS64ORconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORconst  [-1] _)
+       // match: (ORconst [-1] _)
        // cond:
        // result: (MOVVconst [-1])
        for {
@@ -5338,7 +5338,7 @@ func rewriteValueMIPS64_OpMIPS64ORconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64SGT(v *Value) bool {
-       // match: (SGT  (MOVVconst [c]) x)
+       // match: (SGT (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (SGTconst  [c] x)
        for {
@@ -5902,7 +5902,7 @@ func rewriteValueMIPS64_OpMIPS64SUBV(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64SUBVconst(v *Value) bool {
-       // match: (SUBVconst [0]  x)
+       // match: (SUBVconst [0] x)
        // cond:
        // result: x
        for {
@@ -5970,16 +5970,16 @@ func rewriteValueMIPS64_OpMIPS64SUBVconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64XOR(v *Value) bool {
-       // match: (XOR (MOVVconst [c]) x)
+       // match: (XOR x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (XORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -5988,16 +5988,16 @@ func rewriteValueMIPS64_OpMIPS64XOR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XOR x (MOVVconst [c]))
+       // match: (XOR (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (XORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -6021,7 +6021,7 @@ func rewriteValueMIPS64_OpMIPS64XOR(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64XORconst(v *Value) bool {
-       // match: (XORconst [0]  x)
+       // match: (XORconst [0] x)
        // cond:
        // result: x
        for {
@@ -7352,7 +7352,7 @@ func rewriteValueMIPS64_OpRsh16Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt8to64  y)))
        for {
@@ -7484,7 +7484,7 @@ func rewriteValueMIPS64_OpRsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x8  <t> x y)
+       // match: (Rsh16x8 <t> x y)
        // cond:
        // result: (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (Const64 <types.UInt64> [63]))) (ZeroExt8to64  y)))
        for {
@@ -7616,7 +7616,7 @@ func rewriteValueMIPS64_OpRsh32Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt8to64  y)))
        for {
@@ -7748,7 +7748,7 @@ func rewriteValueMIPS64_OpRsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x8  <t> x y)
+       // match: (Rsh32x8 <t> x y)
        // cond:
        // result: (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (Const64 <types.UInt64> [63]))) (ZeroExt8to64  y)))
        for {
@@ -7874,7 +7874,7 @@ func rewriteValueMIPS64_OpRsh64Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64Ux8  <t> x y)
+       // match: (Rsh64Ux8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> x (ZeroExt8to64  y)))
        for {
@@ -7998,7 +7998,7 @@ func rewriteValueMIPS64_OpRsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x8  <t> x y)
+       // match: (Rsh64x8 <t> x y)
        // cond:
        // result: (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (Const64 <types.UInt64> [63]))) (ZeroExt8to64  y)))
        for {
@@ -8128,7 +8128,7 @@ func rewriteValueMIPS64_OpRsh8Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt8to64  y)))
        for {
@@ -8260,7 +8260,7 @@ func rewriteValueMIPS64_OpRsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x8  <t> x y)
+       // match: (Rsh8x8 <t> x y)
        // cond:
        // result: (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (Const64 <types.UInt64> [63]))) (ZeroExt8to64  y)))
        for {
@@ -8331,7 +8331,7 @@ func rewriteValueMIPS64_OpSelect0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select0 (DIVV  (MOVVconst [c]) (MOVVconst [d])))
+       // match: (Select0 (DIVV (MOVVconst [c]) (MOVVconst [d])))
        // cond:
        // result: (MOVVconst [int64(c)%int64(d)])
        for {
@@ -8398,6 +8398,26 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU (MOVVconst [-1]) x))
+       // cond:
+       // result: (NEGV x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_0.AuxInt != -1 {
+                       break
+               }
+               x := v_0.Args[1]
+               v.reset(OpMIPS64NEGV)
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU _ (MOVVconst [0])))
        // cond:
        // result: (MOVVconst [0])
@@ -8417,6 +8437,25 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (Select1 (MULVU (MOVVconst [0]) _))
+       // cond:
+       // result: (MOVVconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPS64MOVVconst)
+               v.AuxInt = 0
+               return true
+       }
        // match: (Select1 (MULVU x (MOVVconst [1])))
        // cond:
        // result: x
@@ -8438,6 +8477,27 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU (MOVVconst [1]) x))
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU x (MOVVconst [c])))
        // cond: isPowerOfTwo(c)
        // result: (SLLVconst [log2(c)] x)
@@ -8460,6 +8520,28 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU (MOVVconst [c]) x))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLVconst [log2(c)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpMIPS64SLLVconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU (MOVVconst [-1]) x))
        // cond:
        // result: (NEGV x)
@@ -8480,6 +8562,26 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU x (MOVVconst [-1])))
+       // cond:
+       // result: (NEGV x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpMIPS64NEGV)
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU (MOVVconst [0]) _))
        // cond:
        // result: (MOVVconst [0])
@@ -8499,6 +8601,25 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (Select1 (MULVU _ (MOVVconst [0])))
+       // cond:
+       // result: (MOVVconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPS64MOVVconst)
+               v.AuxInt = 0
+               return true
+       }
        // match: (Select1 (MULVU (MOVVconst [1]) x))
        // cond:
        // result: x
@@ -8520,6 +8641,27 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU x (MOVVconst [1])))
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU (MOVVconst [c]) x))
        // cond: isPowerOfTwo(c)
        // result: (SLLVconst [log2(c)] x)
@@ -8542,6 +8684,28 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU x (MOVVconst [c])))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLVconst [log2(c)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpMIPS64SLLVconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (DIVVU x (MOVVconst [1])))
        // cond:
        // result: x
@@ -8607,7 +8771,29 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AuxInt = c * d
                return true
        }
-       // match: (Select1 (DIVV  (MOVVconst [c]) (MOVVconst [d])))
+       // match: (Select1 (MULVU (MOVVconst [d]) (MOVVconst [c])))
+       // cond:
+       // result: (MOVVconst [c*d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               d := v_0_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v.reset(OpMIPS64MOVVconst)
+               v.AuxInt = c * d
+               return true
+       }
+       // match: (Select1 (DIVV (MOVVconst [c]) (MOVVconst [d])))
        // cond:
        // result: (MOVVconst [int64(c)/int64(d)])
        for {
@@ -9834,7 +10020,7 @@ func rewriteBlockMIPS64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (EQ  (MOVVconst [0]) yes no)
+               // match: (EQ (MOVVconst [0]) yes no)
                // cond:
                // result: (First nil yes no)
                for {
@@ -9853,7 +10039,7 @@ func rewriteBlockMIPS64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (EQ  (MOVVconst [c]) yes no)
+               // match: (EQ (MOVVconst [c]) yes no)
                // cond: c != 0
                // result: (First nil no yes)
                for {
@@ -10273,7 +10459,7 @@ func rewriteBlockMIPS64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE  (MOVVconst [0]) yes no)
+               // match: (NE (MOVVconst [0]) yes no)
                // cond:
                // result: (First nil no yes)
                for {
@@ -10293,7 +10479,7 @@ func rewriteBlockMIPS64(b *Block) bool {
                        _ = yes
                        return true
                }
-               // match: (NE  (MOVVconst [c]) yes no)
+               // match: (NE (MOVVconst [c]) yes no)
                // cond: c != 0
                // result: (First nil yes no)
                for {
index 785fbd211ff1666dddfaa3fadbef80aa3efee0f2..703989611d6a7d3e5bf3fc1999585f5d8a9c7921 100644 (file)
@@ -598,7 +598,7 @@ func rewriteValuePPC64(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpAdd16(v *Value) bool {
-       // match: (Add16  x y)
+       // match: (Add16 x y)
        // cond:
        // result: (ADD x y)
        for {
@@ -611,7 +611,7 @@ func rewriteValuePPC64_OpAdd16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAdd32(v *Value) bool {
-       // match: (Add32  x y)
+       // match: (Add32 x y)
        // cond:
        // result: (ADD x y)
        for {
@@ -637,7 +637,7 @@ func rewriteValuePPC64_OpAdd32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAdd64(v *Value) bool {
-       // match: (Add64  x y)
+       // match: (Add64 x y)
        // cond:
        // result: (ADD  x y)
        for {
@@ -663,7 +663,7 @@ func rewriteValuePPC64_OpAdd64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAdd8(v *Value) bool {
-       // match: (Add8   x y)
+       // match: (Add8 x y)
        // cond:
        // result: (ADD x y)
        for {
@@ -741,7 +741,7 @@ func rewriteValuePPC64_OpAnd64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAnd8(v *Value) bool {
-       // match: (And8  x y)
+       // match: (And8 x y)
        // cond:
        // result: (AND x y)
        for {
@@ -876,7 +876,7 @@ func rewriteValuePPC64_OpAtomicExchange64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicLoad32(v *Value) bool {
-       // match: (AtomicLoad32  ptr mem)
+       // match: (AtomicLoad32 ptr mem)
        // cond:
        // result: (LoweredAtomicLoad32 ptr mem)
        for {
@@ -889,7 +889,7 @@ func rewriteValuePPC64_OpAtomicLoad32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicLoad64(v *Value) bool {
-       // match: (AtomicLoad64  ptr mem)
+       // match: (AtomicLoad64 ptr mem)
        // cond:
        // result: (LoweredAtomicLoad64 ptr mem)
        for {
@@ -915,7 +915,7 @@ func rewriteValuePPC64_OpAtomicLoadPtr(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicOr8(v *Value) bool {
-       // match: (AtomicOr8  ptr val mem)
+       // match: (AtomicOr8 ptr val mem)
        // cond:
        // result: (LoweredAtomicOr8  ptr val mem)
        for {
@@ -930,7 +930,7 @@ func rewriteValuePPC64_OpAtomicOr8(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicStore32(v *Value) bool {
-       // match: (AtomicStore32      ptr val mem)
+       // match: (AtomicStore32 ptr val mem)
        // cond:
        // result: (LoweredAtomicStore32 ptr val mem)
        for {
@@ -945,7 +945,7 @@ func rewriteValuePPC64_OpAtomicStore32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicStore64(v *Value) bool {
-       // match: (AtomicStore64      ptr val mem)
+       // match: (AtomicStore64 ptr val mem)
        // cond:
        // result: (LoweredAtomicStore64 ptr val mem)
        for {
@@ -1035,7 +1035,7 @@ func rewriteValuePPC64_OpCom64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpCom8(v *Value) bool {
-       // match: (Com8  x)
+       // match: (Com8 x)
        // cond:
        // result: (NOR x x)
        for {
@@ -1047,7 +1047,7 @@ func rewriteValuePPC64_OpCom8(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpConst16(v *Value) bool {
-       // match: (Const16  [val])
+       // match: (Const16 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1058,7 +1058,7 @@ func rewriteValuePPC64_OpConst16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpConst32(v *Value) bool {
-       // match: (Const32  [val])
+       // match: (Const32 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1080,7 +1080,7 @@ func rewriteValuePPC64_OpConst32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpConst64(v *Value) bool {
-       // match: (Const64  [val])
+       // match: (Const64 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1102,7 +1102,7 @@ func rewriteValuePPC64_OpConst64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpConst8(v *Value) bool {
-       // match: (Const8   [val])
+       // match: (Const8 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1320,7 +1320,7 @@ func rewriteValuePPC64_OpDiv16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16  x y)
+       // match: (Div16 x y)
        // cond:
        // result: (DIVW  (SignExt16to32 x) (SignExt16to32 y))
        for {
@@ -1358,7 +1358,7 @@ func rewriteValuePPC64_OpDiv16u(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpDiv32(v *Value) bool {
-       // match: (Div32  x y)
+       // match: (Div32 x y)
        // cond:
        // result: (DIVW  x y)
        for {
@@ -1397,7 +1397,7 @@ func rewriteValuePPC64_OpDiv32u(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpDiv64(v *Value) bool {
-       // match: (Div64  x y)
+       // match: (Div64 x y)
        // cond:
        // result: (DIVD  x y)
        for {
@@ -1440,7 +1440,7 @@ func rewriteValuePPC64_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   x y)
+       // match: (Div8 x y)
        // cond:
        // result: (DIVW  (SignExt8to32 x) (SignExt8to32 y))
        for {
@@ -1461,7 +1461,7 @@ func rewriteValuePPC64_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  x y)
+       // match: (Div8u x y)
        // cond:
        // result: (DIVWU (ZeroExt8to32 x) (ZeroExt8to32 y))
        for {
@@ -2066,7 +2066,7 @@ func rewriteValuePPC64_OpGreater8U(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpHmul32(v *Value) bool {
-       // match: (Hmul32  x y)
+       // match: (Hmul32 x y)
        // cond:
        // result: (MULHW  x y)
        for {
@@ -2079,7 +2079,7 @@ func rewriteValuePPC64_OpHmul32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpHmul32u(v *Value) bool {
-       // match: (Hmul32u  x y)
+       // match: (Hmul32u x y)
        // cond:
        // result: (MULHWU x y)
        for {
@@ -2092,7 +2092,7 @@ func rewriteValuePPC64_OpHmul32u(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpHmul64(v *Value) bool {
-       // match: (Hmul64  x y)
+       // match: (Hmul64 x y)
        // cond:
        // result: (MULHD  x y)
        for {
@@ -2105,7 +2105,7 @@ func rewriteValuePPC64_OpHmul64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpHmul64u(v *Value) bool {
-       // match: (Hmul64u  x y)
+       // match: (Hmul64u x y)
        // cond:
        // result: (MULHDU x y)
        for {
@@ -2761,7 +2761,7 @@ func rewriteValuePPC64_OpLsh16x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x32  x (Const64 [c]))
+       // match: (Lsh16x32 x (Const64 [c]))
        // cond: uint32(c) < 16
        // result: (SLWconst x [c])
        for {
@@ -2779,7 +2779,7 @@ func rewriteValuePPC64_OpLsh16x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh16x32  x (MOVDconst [c]))
+       // match: (Lsh16x32 x (MOVDconst [c]))
        // cond: uint32(c) < 16
        // result: (SLWconst x [c])
        for {
@@ -2824,7 +2824,7 @@ func rewriteValuePPC64_OpLsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x64  x (Const64 [c]))
+       // match: (Lsh16x64 x (Const64 [c]))
        // cond: uint64(c) < 16
        // result: (SLWconst x [c])
        for {
@@ -2842,7 +2842,7 @@ func rewriteValuePPC64_OpLsh16x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh16x64  _ (Const64 [c]))
+       // match: (Lsh16x64 _ (Const64 [c]))
        // cond: uint64(c) >= 16
        // result: (MOVDconst [0])
        for {
@@ -2858,7 +2858,7 @@ func rewriteValuePPC64_OpLsh16x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh16x64  x (MOVDconst [c]))
+       // match: (Lsh16x64 x (MOVDconst [c]))
        // cond: uint64(c) < 16
        // result: (SLWconst x [c])
        for {
@@ -2955,7 +2955,7 @@ func rewriteValuePPC64_OpLsh32x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x32  x (Const64 [c]))
+       // match: (Lsh32x32 x (Const64 [c]))
        // cond: uint32(c) < 32
        // result: (SLWconst x [c])
        for {
@@ -2973,7 +2973,7 @@ func rewriteValuePPC64_OpLsh32x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh32x32  x (MOVDconst [c]))
+       // match: (Lsh32x32 x (MOVDconst [c]))
        // cond: uint32(c) < 32
        // result: (SLWconst x [c])
        for {
@@ -3018,7 +3018,7 @@ func rewriteValuePPC64_OpLsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x64  x (Const64 [c]))
+       // match: (Lsh32x64 x (Const64 [c]))
        // cond: uint64(c) < 32
        // result: (SLWconst x [c])
        for {
@@ -3036,7 +3036,7 @@ func rewriteValuePPC64_OpLsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh32x64  _ (Const64 [c]))
+       // match: (Lsh32x64 _ (Const64 [c]))
        // cond: uint64(c) >= 32
        // result: (MOVDconst [0])
        for {
@@ -3052,7 +3052,7 @@ func rewriteValuePPC64_OpLsh32x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh32x64  x (MOVDconst [c]))
+       // match: (Lsh32x64 x (MOVDconst [c]))
        // cond: uint64(c) < 32
        // result: (SLWconst x [c])
        for {
@@ -3149,7 +3149,7 @@ func rewriteValuePPC64_OpLsh64x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x32  x (Const64 [c]))
+       // match: (Lsh64x32 x (Const64 [c]))
        // cond: uint32(c) < 64
        // result: (SLDconst x [c])
        for {
@@ -3167,7 +3167,7 @@ func rewriteValuePPC64_OpLsh64x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh64x32  x (MOVDconst [c]))
+       // match: (Lsh64x32 x (MOVDconst [c]))
        // cond: uint32(c) < 64
        // result: (SLDconst x [c])
        for {
@@ -3212,7 +3212,7 @@ func rewriteValuePPC64_OpLsh64x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x64  x (Const64 [c]))
+       // match: (Lsh64x64 x (Const64 [c]))
        // cond: uint64(c) < 64
        // result: (SLDconst x [c])
        for {
@@ -3230,7 +3230,7 @@ func rewriteValuePPC64_OpLsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh64x64  _ (Const64 [c]))
+       // match: (Lsh64x64 _ (Const64 [c]))
        // cond: uint64(c) >= 64
        // result: (MOVDconst [0])
        for {
@@ -3246,7 +3246,7 @@ func rewriteValuePPC64_OpLsh64x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh64x64  x (MOVDconst [c]))
+       // match: (Lsh64x64 x (MOVDconst [c]))
        // cond: uint64(c) < 64
        // result: (SLDconst x [c])
        for {
@@ -3343,7 +3343,7 @@ func rewriteValuePPC64_OpLsh8x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x32   x (Const64 [c]))
+       // match: (Lsh8x32 x (Const64 [c]))
        // cond: uint32(c) < 8
        // result: (SLWconst x [c])
        for {
@@ -3361,7 +3361,7 @@ func rewriteValuePPC64_OpLsh8x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh8x32   x (MOVDconst [c]))
+       // match: (Lsh8x32 x (MOVDconst [c]))
        // cond: uint32(c) < 8
        // result: (SLWconst x [c])
        for {
@@ -3406,7 +3406,7 @@ func rewriteValuePPC64_OpLsh8x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x64   x (Const64 [c]))
+       // match: (Lsh8x64 x (Const64 [c]))
        // cond: uint64(c) < 8
        // result: (SLWconst x [c])
        for {
@@ -3424,7 +3424,7 @@ func rewriteValuePPC64_OpLsh8x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh8x64   _ (Const64 [c]))
+       // match: (Lsh8x64 _ (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (MOVDconst [0])
        for {
@@ -3440,7 +3440,7 @@ func rewriteValuePPC64_OpLsh8x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh8x64   x (MOVDconst [c]))
+       // match: (Lsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SLWconst x [c])
        for {
@@ -4040,7 +4040,7 @@ func rewriteValuePPC64_OpMove(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpMul16(v *Value) bool {
-       // match: (Mul16  x y)
+       // match: (Mul16 x y)
        // cond:
        // result: (MULLW x y)
        for {
@@ -4053,7 +4053,7 @@ func rewriteValuePPC64_OpMul16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpMul32(v *Value) bool {
-       // match: (Mul32  x y)
+       // match: (Mul32 x y)
        // cond:
        // result: (MULLW  x y)
        for {
@@ -4079,7 +4079,7 @@ func rewriteValuePPC64_OpMul32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpMul64(v *Value) bool {
-       // match: (Mul64  x y)
+       // match: (Mul64 x y)
        // cond:
        // result: (MULLD  x y)
        for {
@@ -4105,7 +4105,7 @@ func rewriteValuePPC64_OpMul64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpMul8(v *Value) bool {
-       // match: (Mul8   x y)
+       // match: (Mul8 x y)
        // cond:
        // result: (MULLW x y)
        for {
@@ -4118,7 +4118,7 @@ func rewriteValuePPC64_OpMul8(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpNeg16(v *Value) bool {
-       // match: (Neg16  x)
+       // match: (Neg16 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4129,7 +4129,7 @@ func rewriteValuePPC64_OpNeg16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpNeg32(v *Value) bool {
-       // match: (Neg32  x)
+       // match: (Neg32 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4151,7 +4151,7 @@ func rewriteValuePPC64_OpNeg32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpNeg64(v *Value) bool {
-       // match: (Neg64  x)
+       // match: (Neg64 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4173,7 +4173,7 @@ func rewriteValuePPC64_OpNeg64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpNeg8(v *Value) bool {
-       // match: (Neg8   x)
+       // match: (Neg8 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4451,7 +4451,7 @@ func rewriteValuePPC64_OpOr64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpOr8(v *Value) bool {
-       // match: (Or8  x y)
+       // match: (Or8 x y)
        // cond:
        // result: (OR x y)
        for {
@@ -4477,16 +4477,16 @@ func rewriteValuePPC64_OpOrB(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpPPC64ADD(v *Value) bool {
-       // match: (ADD (MOVDconst [c]) x)
+       // match: (ADD x (MOVDconst [c]))
        // cond: is32Bit(c)
        // result: (ADDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpPPC64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -4495,16 +4495,16 @@ func rewriteValuePPC64_OpPPC64ADD(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADD x (MOVDconst [c]))
+       // match: (ADD (MOVDconst [c]) x)
        // cond: is32Bit(c)
        // result: (ADDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpPPC64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -4587,6 +4587,24 @@ func rewriteValuePPC64_OpPPC64AND(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (AND (NOR y y) x)
+       // cond:
+       // result: (ANDN x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64NOR {
+                       break
+               }
+               y := v_0.Args[0]
+               if y != v_0.Args[1] {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpPPC64ANDN)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (AND (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [c&d])
@@ -4605,6 +4623,24 @@ func rewriteValuePPC64_OpPPC64AND(v *Value) bool {
                v.AuxInt = c & d
                return true
        }
+       // match: (AND (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c&d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpPPC64MOVDconst)
+               v.AuxInt = c & d
+               return true
+       }
        // match: (AND x (MOVDconst [c]))
        // cond: isU16Bit(c)
        // result: (ANDconst [c] x)
@@ -4677,6 +4713,42 @@ func rewriteValuePPC64_OpPPC64AND(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (AND x:(MOVBZload _ _) (MOVDconst [c]))
+       // cond:
+       // result: (ANDconst [c&0xFF] x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpPPC64MOVBZload {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpPPC64ANDconst)
+               v.AuxInt = c & 0xFF
+               v.AddArg(x)
+               return true
+       }
+       // match: (AND (MOVDconst [c]) x:(MOVBZload _ _))
+       // cond:
+       // result: (ANDconst [c&0xFF] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if x.Op != OpPPC64MOVBZload {
+                       break
+               }
+               v.reset(OpPPC64ANDconst)
+               v.AuxInt = c & 0xFF
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpPPC64ANDconst(v *Value) bool {
@@ -5307,23 +5379,6 @@ func rewriteValuePPC64_OpPPC64Equal(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpPPC64FADD(v *Value) bool {
-       // match: (FADD z (FMUL x y))
-       // cond:
-       // result: (FMADD x y z)
-       for {
-               z := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpPPC64FMUL {
-                       break
-               }
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpPPC64FMADD)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(z)
-               return true
-       }
        // match: (FADD (FMUL x y) z)
        // cond:
        // result: (FMADD x y z)
@@ -5341,26 +5396,26 @@ func rewriteValuePPC64_OpPPC64FADD(v *Value) bool {
                v.AddArg(z)
                return true
        }
-       return false
-}
-func rewriteValuePPC64_OpPPC64FADDS(v *Value) bool {
-       // match: (FADDS z (FMULS x y))
+       // match: (FADD z (FMUL x y))
        // cond:
-       // result: (FMADDS x y z)
+       // result: (FMADD x y z)
        for {
                z := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpPPC64FMULS {
+               if v_1.Op != OpPPC64FMUL {
                        break
                }
                x := v_1.Args[0]
                y := v_1.Args[1]
-               v.reset(OpPPC64FMADDS)
+               v.reset(OpPPC64FMADD)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(z)
                return true
        }
+       return false
+}
+func rewriteValuePPC64_OpPPC64FADDS(v *Value) bool {
        // match: (FADDS (FMULS x y) z)
        // cond:
        // result: (FMADDS x y z)
@@ -5378,6 +5433,23 @@ func rewriteValuePPC64_OpPPC64FADDS(v *Value) bool {
                v.AddArg(z)
                return true
        }
+       // match: (FADDS z (FMULS x y))
+       // cond:
+       // result: (FMADDS x y z)
+       for {
+               z := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64FMULS {
+                       break
+               }
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpPPC64FMADDS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpPPC64FMOVDload(v *Value) bool {
@@ -6891,6 +6963,27 @@ func rewriteValuePPC64_OpPPC64MOVWZreg(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (MOVWZreg y:(AND _ (MOVDconst [c])))
+       // cond: uint64(c) <= 0xFFFFFFFF
+       // result: y
+       for {
+               y := v.Args[0]
+               if y.Op != OpPPC64AND {
+                       break
+               }
+               y_1 := y.Args[1]
+               if y_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := y_1.AuxInt
+               if !(uint64(c) <= 0xFFFFFFFF) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (MOVWZreg y:(MOVWZreg _))
        // cond:
        // result: y
@@ -7034,6 +7127,27 @@ func rewriteValuePPC64_OpPPC64MOVWreg(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (MOVWreg y:(AND _ (MOVDconst [c])))
+       // cond: uint64(c) <= 0x7FFFFFFF
+       // result: y
+       for {
+               y := v.Args[0]
+               if y.Op != OpPPC64AND {
+                       break
+               }
+               y_1 := y.Args[1]
+               if y_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := y_1.AuxInt
+               if !(uint64(c) <= 0x7FFFFFFF) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (MOVWreg y:(MOVWreg _))
        // cond:
        // result: y
@@ -7352,6 +7466,24 @@ func rewriteValuePPC64_OpPPC64OR(v *Value) bool {
                v.AuxInt = c | d
                return true
        }
+       // match: (OR (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c|d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpPPC64MOVDconst)
+               v.AuxInt = c | d
+               return true
+       }
        // match: (OR x (MOVDconst [c]))
        // cond: isU32Bit(c)
        // result: (ORconst [c] x)
@@ -7493,6 +7625,24 @@ func rewriteValuePPC64_OpPPC64XOR(v *Value) bool {
                v.AuxInt = c ^ d
                return true
        }
+       // match: (XOR (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c^d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpPPC64MOVDconst)
+               v.AuxInt = c ^ d
+               return true
+       }
        // match: (XOR x (MOVDconst [c]))
        // cond: isU32Bit(c)
        // result: (XORconst [c] x)
@@ -7829,7 +7979,7 @@ func rewriteValuePPC64_OpRsh16x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x32  x (Const64 [c]))
+       // match: (Rsh16x32 x (Const64 [c]))
        // cond: uint32(c) < 16
        // result: (SRAWconst (SignExt16to32 x) [c])
        for {
@@ -7849,7 +7999,7 @@ func rewriteValuePPC64_OpRsh16x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x32  x (MOVDconst [c]))
+       // match: (Rsh16x32 x (MOVDconst [c]))
        // cond: uint32(c) < 16
        // result: (SRAWconst (SignExt16to32 x) [c])
        for {
@@ -7898,7 +8048,7 @@ func rewriteValuePPC64_OpRsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x64  x (Const64 [c]))
+       // match: (Rsh16x64 x (Const64 [c]))
        // cond: uint64(c) < 16
        // result: (SRAWconst (SignExt16to32 x) [c])
        for {
@@ -7938,7 +8088,7 @@ func rewriteValuePPC64_OpRsh16x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x64  x (MOVDconst [c]))
+       // match: (Rsh16x64 x (MOVDconst [c]))
        // cond: uint64(c) < 16
        // result: (SRAWconst (SignExt16to32 x) [c])
        for {
@@ -8235,7 +8385,7 @@ func rewriteValuePPC64_OpRsh32x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x32  x (Const64 [c]))
+       // match: (Rsh32x32 x (Const64 [c]))
        // cond: uint32(c) < 32
        // result: (SRAWconst x [c])
        for {
@@ -8253,7 +8403,7 @@ func rewriteValuePPC64_OpRsh32x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh32x32  x (MOVDconst [c]))
+       // match: (Rsh32x32 x (MOVDconst [c]))
        // cond: uint32(c) < 32
        // result: (SRAWconst x [c])
        for {
@@ -8298,7 +8448,7 @@ func rewriteValuePPC64_OpRsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x64  x (Const64 [c]))
+       // match: (Rsh32x64 x (Const64 [c]))
        // cond: uint64(c) < 32
        // result: (SRAWconst x [c])
        for {
@@ -8334,7 +8484,7 @@ func rewriteValuePPC64_OpRsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh32x64  x (MOVDconst [c]))
+       // match: (Rsh32x64 x (MOVDconst [c]))
        // cond: uint64(c) < 32
        // result: (SRAWconst x [c])
        for {
@@ -8625,7 +8775,7 @@ func rewriteValuePPC64_OpRsh64x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x32  x (Const64 [c]))
+       // match: (Rsh64x32 x (Const64 [c]))
        // cond: uint32(c) < 64
        // result: (SRADconst x [c])
        for {
@@ -8643,7 +8793,7 @@ func rewriteValuePPC64_OpRsh64x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh64x32  x (MOVDconst [c]))
+       // match: (Rsh64x32 x (MOVDconst [c]))
        // cond: uint32(c) < 64
        // result: (SRADconst x [c])
        for {
@@ -8688,7 +8838,7 @@ func rewriteValuePPC64_OpRsh64x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x64  x (Const64 [c]))
+       // match: (Rsh64x64 x (Const64 [c]))
        // cond: uint64(c) < 64
        // result: (SRADconst x [c])
        for {
@@ -8724,7 +8874,7 @@ func rewriteValuePPC64_OpRsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh64x64  x (MOVDconst [c]))
+       // match: (Rsh64x64 x (MOVDconst [c]))
        // cond: uint64(c) < 64
        // result: (SRADconst x [c])
        for {
@@ -8823,7 +8973,7 @@ func rewriteValuePPC64_OpRsh8Ux32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux32  x (Const64 [c]))
+       // match: (Rsh8Ux32 x (Const64 [c]))
        // cond: uint32(c) < 8
        // result: (SRWconst (ZeroExt8to32  x) [c])
        for {
@@ -8843,7 +8993,7 @@ func rewriteValuePPC64_OpRsh8Ux32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux32  x (MOVDconst [c]))
+       // match: (Rsh8Ux32 x (MOVDconst [c]))
        // cond: uint32(c) < 8
        // result: (SRWconst (ZeroExt8to32  x) [c])
        for {
@@ -8892,7 +9042,7 @@ func rewriteValuePPC64_OpRsh8Ux64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux64  x (Const64 [c]))
+       // match: (Rsh8Ux64 x (Const64 [c]))
        // cond: uint64(c) < 8
        // result: (SRWconst (ZeroExt8to32  x) [c])
        for {
@@ -8912,7 +9062,7 @@ func rewriteValuePPC64_OpRsh8Ux64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux64  _ (Const64 [c]))
+       // match: (Rsh8Ux64 _ (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (MOVDconst [0])
        for {
@@ -8928,7 +9078,7 @@ func rewriteValuePPC64_OpRsh8Ux64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Rsh8Ux64  x (MOVDconst [c]))
+       // match: (Rsh8Ux64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SRWconst (ZeroExt8to32  x) [c])
        for {
@@ -9033,7 +9183,7 @@ func rewriteValuePPC64_OpRsh8x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x32   x (Const64 [c]))
+       // match: (Rsh8x32 x (Const64 [c]))
        // cond: uint32(c) < 8
        // result: (SRAWconst (SignExt8to32  x) [c])
        for {
@@ -9053,7 +9203,7 @@ func rewriteValuePPC64_OpRsh8x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x32   x (MOVDconst [c]))
+       // match: (Rsh8x32 x (MOVDconst [c]))
        // cond: uint32(c) < 8
        // result: (SRAWconst (SignExt8to32  x) [c])
        for {
@@ -9102,7 +9252,7 @@ func rewriteValuePPC64_OpRsh8x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x64   x (Const64 [c]))
+       // match: (Rsh8x64 x (Const64 [c]))
        // cond: uint64(c) < 8
        // result: (SRAWconst (SignExt8to32  x) [c])
        for {
@@ -9122,7 +9272,7 @@ func rewriteValuePPC64_OpRsh8x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x64  x (Const64 [c]))
+       // match: (Rsh8x64 x (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (SRAWconst (SignExt8to32  x) [63])
        for {
@@ -9142,7 +9292,7 @@ func rewriteValuePPC64_OpRsh8x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x64   x (MOVDconst [c]))
+       // match: (Rsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SRAWconst (SignExt8to32  x) [c])
        for {
@@ -9247,7 +9397,7 @@ func rewriteValuePPC64_OpSignExt32to64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  x)
+       // match: (SignExt8to16 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9258,7 +9408,7 @@ func rewriteValuePPC64_OpSignExt8to16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  x)
+       // match: (SignExt8to32 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9269,7 +9419,7 @@ func rewriteValuePPC64_OpSignExt8to32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSignExt8to64(v *Value) bool {
-       // match: (SignExt8to64  x)
+       // match: (SignExt8to64 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9445,7 +9595,7 @@ func rewriteValuePPC64_OpStore(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpSub16(v *Value) bool {
-       // match: (Sub16  x y)
+       // match: (Sub16 x y)
        // cond:
        // result: (SUB x y)
        for {
@@ -9458,7 +9608,7 @@ func rewriteValuePPC64_OpSub16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSub32(v *Value) bool {
-       // match: (Sub32  x y)
+       // match: (Sub32 x y)
        // cond:
        // result: (SUB x y)
        for {
@@ -9484,7 +9634,7 @@ func rewriteValuePPC64_OpSub32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSub64(v *Value) bool {
-       // match: (Sub64  x y)
+       // match: (Sub64 x y)
        // cond:
        // result: (SUB  x y)
        for {
@@ -9510,7 +9660,7 @@ func rewriteValuePPC64_OpSub64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSub8(v *Value) bool {
-       // match: (Sub8   x y)
+       // match: (Sub8 x y)
        // cond:
        // result: (SUB x y)
        for {
@@ -9536,7 +9686,7 @@ func rewriteValuePPC64_OpSubPtr(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  x)
+       // match: (Trunc16to8 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9558,7 +9708,7 @@ func rewriteValuePPC64_OpTrunc32to16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  x)
+       // match: (Trunc32to8 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9591,7 +9741,7 @@ func rewriteValuePPC64_OpTrunc64to32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpTrunc64to8(v *Value) bool {
-       // match: (Trunc64to8  x)
+       // match: (Trunc64to8 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9641,7 +9791,7 @@ func rewriteValuePPC64_OpXor64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpXor8(v *Value) bool {
-       // match: (Xor8  x y)
+       // match: (Xor8 x y)
        // cond:
        // result: (XOR x y)
        for {
@@ -10042,7 +10192,7 @@ func rewriteValuePPC64_OpZeroExt32to64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  x)
+       // match: (ZeroExt8to16 x)
        // cond:
        // result: (MOVBZreg x)
        for {
@@ -10053,7 +10203,7 @@ func rewriteValuePPC64_OpZeroExt8to16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  x)
+       // match: (ZeroExt8to32 x)
        // cond:
        // result: (MOVBZreg x)
        for {
@@ -10064,7 +10214,7 @@ func rewriteValuePPC64_OpZeroExt8to32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpZeroExt8to64(v *Value) bool {
-       // match: (ZeroExt8to64  x)
+       // match: (ZeroExt8to64 x)
        // cond:
        // result: (MOVBZreg x)
        for {
index 6740fe4cad971bd6c02fb0ffc86804909293542e..72caf9405da1074e5f575e740b9942f0f6ee90da 100644 (file)
@@ -716,7 +716,7 @@ func rewriteValueS390X(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpAdd16(v *Value) bool {
-       // match: (Add16  x y)
+       // match: (Add16 x y)
        // cond:
        // result: (ADDW  x y)
        for {
@@ -729,7 +729,7 @@ func rewriteValueS390X_OpAdd16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpAdd32(v *Value) bool {
-       // match: (Add32  x y)
+       // match: (Add32 x y)
        // cond:
        // result: (ADDW  x y)
        for {
@@ -755,7 +755,7 @@ func rewriteValueS390X_OpAdd32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpAdd64(v *Value) bool {
-       // match: (Add64  x y)
+       // match: (Add64 x y)
        // cond:
        // result: (ADD  x y)
        for {
@@ -781,7 +781,7 @@ func rewriteValueS390X_OpAdd64F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpAdd8(v *Value) bool {
-       // match: (Add8   x y)
+       // match: (Add8 x y)
        // cond:
        // result: (ADDW  x y)
        for {
@@ -859,7 +859,7 @@ func rewriteValueS390X_OpAnd64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpAnd8(v *Value) bool {
-       // match: (And8  x y)
+       // match: (And8 x y)
        // cond:
        // result: (ANDW x y)
        for {
@@ -1191,7 +1191,7 @@ func rewriteValueS390X_OpCom64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpCom8(v *Value) bool {
-       // match: (Com8  x)
+       // match: (Com8 x)
        // cond:
        // result: (NOTW x)
        for {
@@ -1202,7 +1202,7 @@ func rewriteValueS390X_OpCom8(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpConst16(v *Value) bool {
-       // match: (Const16  [val])
+       // match: (Const16 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1213,7 +1213,7 @@ func rewriteValueS390X_OpConst16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpConst32(v *Value) bool {
-       // match: (Const32  [val])
+       // match: (Const32 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1235,7 +1235,7 @@ func rewriteValueS390X_OpConst32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpConst64(v *Value) bool {
-       // match: (Const64  [val])
+       // match: (Const64 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1257,7 +1257,7 @@ func rewriteValueS390X_OpConst64F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpConst8(v *Value) bool {
-       // match: (Const8   [val])
+       // match: (Const8 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1478,7 +1478,7 @@ func rewriteValueS390X_OpDiv16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16  x y)
+       // match: (Div16 x y)
        // cond:
        // result: (DIVW  (MOVHreg x) (MOVHreg y))
        for {
@@ -1520,7 +1520,7 @@ func rewriteValueS390X_OpDiv32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div32  x y)
+       // match: (Div32 x y)
        // cond:
        // result: (DIVW  (MOVWreg x) y)
        for {
@@ -1567,7 +1567,7 @@ func rewriteValueS390X_OpDiv32u(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpDiv64(v *Value) bool {
-       // match: (Div64  x y)
+       // match: (Div64 x y)
        // cond:
        // result: (DIVD  x y)
        for {
@@ -1610,7 +1610,7 @@ func rewriteValueS390X_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   x y)
+       // match: (Div8 x y)
        // cond:
        // result: (DIVW  (MOVBreg x) (MOVBreg y))
        for {
@@ -1631,7 +1631,7 @@ func rewriteValueS390X_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  x y)
+       // match: (Div8u x y)
        // cond:
        // result: (DIVWU (MOVBZreg x) (MOVBZreg y))
        for {
@@ -1652,7 +1652,7 @@ func rewriteValueS390X_OpEq16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Eq16  x y)
+       // match: (Eq16 x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -1681,7 +1681,7 @@ func rewriteValueS390X_OpEq32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Eq32  x y)
+       // match: (Eq32 x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -1731,7 +1731,7 @@ func rewriteValueS390X_OpEq64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Eq64  x y)
+       // match: (Eq64 x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -1781,7 +1781,7 @@ func rewriteValueS390X_OpEq8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Eq8   x y)
+       // match: (Eq8 x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -1810,7 +1810,7 @@ func rewriteValueS390X_OpEqB(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (EqB   x y)
+       // match: (EqB x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -1864,7 +1864,7 @@ func rewriteValueS390X_OpGeq16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq16  x y)
+       // match: (Geq16 x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -1922,7 +1922,7 @@ func rewriteValueS390X_OpGeq32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq32  x y)
+       // match: (Geq32 x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -1997,7 +1997,7 @@ func rewriteValueS390X_OpGeq64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq64  x y)
+       // match: (Geq64 x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -2072,7 +2072,7 @@ func rewriteValueS390X_OpGeq8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq8   x y)
+       // match: (Geq8 x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -2101,7 +2101,7 @@ func rewriteValueS390X_OpGeq8U(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq8U  x y)
+       // match: (Geq8U x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
        for {
@@ -2150,7 +2150,7 @@ func rewriteValueS390X_OpGreater16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater16  x y)
+       // match: (Greater16 x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -2208,7 +2208,7 @@ func rewriteValueS390X_OpGreater32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater32  x y)
+       // match: (Greater32 x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -2283,7 +2283,7 @@ func rewriteValueS390X_OpGreater64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater64  x y)
+       // match: (Greater64 x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -2358,7 +2358,7 @@ func rewriteValueS390X_OpGreater8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater8   x y)
+       // match: (Greater8 x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -2387,7 +2387,7 @@ func rewriteValueS390X_OpGreater8U(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater8U  x y)
+       // match: (Greater8U x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
        for {
@@ -2416,7 +2416,7 @@ func rewriteValueS390X_OpHmul32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Hmul32  x y)
+       // match: (Hmul32 x y)
        // cond:
        // result: (SRDconst [32] (MULLD (MOVWreg x) (MOVWreg y)))
        for {
@@ -2460,7 +2460,7 @@ func rewriteValueS390X_OpHmul32u(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpHmul64(v *Value) bool {
-       // match: (Hmul64  x y)
+       // match: (Hmul64 x y)
        // cond:
        // result: (MULHD  x y)
        for {
@@ -2597,7 +2597,7 @@ func rewriteValueS390X_OpLeq16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq16  x y)
+       // match: (Leq16 x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -2655,7 +2655,7 @@ func rewriteValueS390X_OpLeq32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq32  x y)
+       // match: (Leq32 x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -2730,7 +2730,7 @@ func rewriteValueS390X_OpLeq64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq64  x y)
+       // match: (Leq64 x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -2805,7 +2805,7 @@ func rewriteValueS390X_OpLeq8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq8   x y)
+       // match: (Leq8 x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -2834,7 +2834,7 @@ func rewriteValueS390X_OpLeq8U(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq8U  x y)
+       // match: (Leq8U x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
        for {
@@ -2863,7 +2863,7 @@ func rewriteValueS390X_OpLess16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less16  x y)
+       // match: (Less16 x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -2921,7 +2921,7 @@ func rewriteValueS390X_OpLess32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less32  x y)
+       // match: (Less32 x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -2996,7 +2996,7 @@ func rewriteValueS390X_OpLess64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less64  x y)
+       // match: (Less64 x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -3071,7 +3071,7 @@ func rewriteValueS390X_OpLess8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less8   x y)
+       // match: (Less8 x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -3100,7 +3100,7 @@ func rewriteValueS390X_OpLess8U(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less8U  x y)
+       // match: (Less8U x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
        for {
@@ -3343,7 +3343,7 @@ func rewriteValueS390X_OpLsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
        for {
@@ -3447,7 +3447,7 @@ func rewriteValueS390X_OpLsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
        for {
@@ -3551,7 +3551,7 @@ func rewriteValueS390X_OpLsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x8  <t> x y)
+       // match: (Lsh64x8 <t> x y)
        // cond:
        // result: (AND (SLD <t> x y) (SUBEcarrymask <t> (CMPWUconst (MOVBZreg y) [63])))
        for {
@@ -3655,7 +3655,7 @@ func rewriteValueS390X_OpLsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
        for {
@@ -3683,7 +3683,7 @@ func rewriteValueS390X_OpMod16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod16  x y)
+       // match: (Mod16 x y)
        // cond:
        // result: (MODW  (MOVHreg x) (MOVHreg y))
        for {
@@ -3725,7 +3725,7 @@ func rewriteValueS390X_OpMod32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod32  x y)
+       // match: (Mod32 x y)
        // cond:
        // result: (MODW  (MOVWreg x) y)
        for {
@@ -3759,7 +3759,7 @@ func rewriteValueS390X_OpMod32u(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpMod64(v *Value) bool {
-       // match: (Mod64  x y)
+       // match: (Mod64 x y)
        // cond:
        // result: (MODD  x y)
        for {
@@ -3789,7 +3789,7 @@ func rewriteValueS390X_OpMod8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8   x y)
+       // match: (Mod8 x y)
        // cond:
        // result: (MODW  (MOVBreg x) (MOVBreg y))
        for {
@@ -3810,7 +3810,7 @@ func rewriteValueS390X_OpMod8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8u  x y)
+       // match: (Mod8u x y)
        // cond:
        // result: (MODWU (MOVBZreg x) (MOVBZreg y))
        for {
@@ -4233,7 +4233,7 @@ func rewriteValueS390X_OpMove(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpMul16(v *Value) bool {
-       // match: (Mul16  x y)
+       // match: (Mul16 x y)
        // cond:
        // result: (MULLW  x y)
        for {
@@ -4246,7 +4246,7 @@ func rewriteValueS390X_OpMul16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpMul32(v *Value) bool {
-       // match: (Mul32  x y)
+       // match: (Mul32 x y)
        // cond:
        // result: (MULLW  x y)
        for {
@@ -4272,7 +4272,7 @@ func rewriteValueS390X_OpMul32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpMul64(v *Value) bool {
-       // match: (Mul64  x y)
+       // match: (Mul64 x y)
        // cond:
        // result: (MULLD  x y)
        for {
@@ -4298,7 +4298,7 @@ func rewriteValueS390X_OpMul64F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpMul8(v *Value) bool {
-       // match: (Mul8   x y)
+       // match: (Mul8 x y)
        // cond:
        // result: (MULLW  x y)
        for {
@@ -4315,7 +4315,7 @@ func rewriteValueS390X_OpNeg16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neg16  x)
+       // match: (Neg16 x)
        // cond:
        // result: (NEGW (MOVHreg x))
        for {
@@ -4328,7 +4328,7 @@ func rewriteValueS390X_OpNeg16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpNeg32(v *Value) bool {
-       // match: (Neg32  x)
+       // match: (Neg32 x)
        // cond:
        // result: (NEGW x)
        for {
@@ -4350,7 +4350,7 @@ func rewriteValueS390X_OpNeg32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpNeg64(v *Value) bool {
-       // match: (Neg64  x)
+       // match: (Neg64 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4376,7 +4376,7 @@ func rewriteValueS390X_OpNeg8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neg8   x)
+       // match: (Neg8 x)
        // cond:
        // result: (NEGW (MOVBreg x))
        for {
@@ -4393,7 +4393,7 @@ func rewriteValueS390X_OpNeq16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neq16  x y)
+       // match: (Neq16 x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -4422,7 +4422,7 @@ func rewriteValueS390X_OpNeq32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neq32  x y)
+       // match: (Neq32 x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -4472,7 +4472,7 @@ func rewriteValueS390X_OpNeq64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neq64  x y)
+       // match: (Neq64 x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -4522,7 +4522,7 @@ func rewriteValueS390X_OpNeq8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neq8   x y)
+       // match: (Neq8 x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -4551,7 +4551,7 @@ func rewriteValueS390X_OpNeqB(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (NeqB   x y)
+       // match: (NeqB x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -4712,7 +4712,7 @@ func rewriteValueS390X_OpOr64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpOr8(v *Value) bool {
-       // match: (Or8  x y)
+       // match: (Or8 x y)
        // cond:
        // result: (ORW x y)
        for {
@@ -4850,7 +4850,7 @@ func rewriteValueS390X_OpRsh16Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (ANDW (SRW <t> (MOVHZreg x) y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [15])))
        for {
@@ -4975,7 +4975,7 @@ func rewriteValueS390X_OpRsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x8  <t> x y)
+       // match: (Rsh16x8 <t> x y)
        // cond:
        // result: (SRAW <t> (MOVHreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [15])))))
        for {
@@ -5084,7 +5084,7 @@ func rewriteValueS390X_OpRsh32Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (ANDW (SRW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
        for {
@@ -5197,7 +5197,7 @@ func rewriteValueS390X_OpRsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x8  <t> x y)
+       // match: (Rsh32x8 <t> x y)
        // cond:
        // result: (SRAW <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [31])))))
        for {
@@ -5304,7 +5304,7 @@ func rewriteValueS390X_OpRsh64Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64Ux8  <t> x y)
+       // match: (Rsh64Ux8 <t> x y)
        // cond:
        // result: (AND (SRD <t> x y) (SUBEcarrymask <t> (CMPWUconst (MOVBZreg y) [63])))
        for {
@@ -5417,7 +5417,7 @@ func rewriteValueS390X_OpRsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x8  <t> x y)
+       // match: (Rsh64x8 <t> x y)
        // cond:
        // result: (SRAD <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [63])))))
        for {
@@ -5534,7 +5534,7 @@ func rewriteValueS390X_OpRsh8Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (ANDW (SRW <t> (MOVBZreg x) y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [7])))
        for {
@@ -5659,7 +5659,7 @@ func rewriteValueS390X_OpRsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x8  <t> x y)
+       // match: (Rsh8x8 <t> x y)
        // cond:
        // result: (SRAW <t> (MOVBreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [7])))))
        for {
@@ -5724,9 +5724,9 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADD (SLDconst x [c]) (SRDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [   c] x)
+       // match: (ADD (SLDconst x [c]) (SRDconst x [d]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSLDconst {
@@ -5738,10 +5738,11 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                if v_1.Op != OpS390XSRDconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpS390XRLLGconst)
@@ -5749,28 +5750,29 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADD (SRDconst x [c]) (SLDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [64-c] x)
+       // match: (ADD (SRDconst x [d]) (SLDconst x [c]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSRDconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XSLDconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpS390XRLLGconst)
-               v.AuxInt = 64 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -5796,7 +5798,7 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (ADD (MOVDaddr [c] {s} x) y)
+       // match: (ADD (MOVDaddr [c] {s} y) x)
        // cond: x.Op != OpSB && y.Op != OpSB
        // result: (MOVDaddridx [c] {s} x y)
        for {
@@ -5806,8 +5808,8 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                }
                c := v_0.AuxInt
                s := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
@@ -5833,6 +5835,21 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADD (NEG y) x)
+       // cond:
+       // result: (SUB x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XNEG {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpS390XSUB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADD <t> x g:(MOVDload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (ADDload <t> [off] {sym} x ptr mem)
@@ -5885,6 +5902,58 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ADD <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADD <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XADDW(v *Value) bool {
@@ -5918,9 +5987,9 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDW (SLWconst x [c]) (SRWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [   c] x)
+       // match: (ADDW (SLWconst x [c]) (SRWconst x [d]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSLWconst {
@@ -5932,10 +6001,11 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                if v_1.Op != OpS390XSRWconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpS390XRLLconst)
@@ -5943,28 +6013,29 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDW (SRWconst x [c]) (SLWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [32-c] x)
+       // match: (ADDW (SRWconst x [d]) (SLWconst x [c]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XSLWconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpS390XRLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -5983,6 +6054,21 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDW (NEGW y) x)
+       // cond:
+       // result: (SUBW x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XNEGW {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpS390XSUBW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDW <t> x g:(MOVWload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (ADDWload <t> [off] {sym} x ptr mem)
@@ -6035,6 +6121,58 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ADDW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (ADDW <t> x g:(MOVWZload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (ADDWload <t> [off] {sym} x ptr mem)
@@ -6087,6 +6225,58 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ADDW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XADDWconst(v *Value) bool {
@@ -6290,22 +6480,6 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (AND (MOVDconst [0xFF]) x)
-       // cond:
-       // result: (MOVBZreg x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
-                       break
-               }
-               if v_0.AuxInt != 0xFF {
-                       break
-               }
-               x := v.Args[1]
-               v.reset(OpS390XMOVBZreg)
-               v.AddArg(x)
-               return true
-       }
        // match: (AND x (MOVDconst [0xFF]))
        // cond:
        // result: (MOVBZreg x)
@@ -6322,19 +6496,19 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (AND (MOVDconst [0xFFFF]) x)
+       // match: (AND (MOVDconst [0xFF]) x)
        // cond:
-       // result: (MOVHZreg x)
+       // result: (MOVBZreg x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XMOVDconst {
                        break
                }
-               if v_0.AuxInt != 0xFFFF {
+               if v_0.AuxInt != 0xFF {
                        break
                }
                x := v.Args[1]
-               v.reset(OpS390XMOVHZreg)
+               v.reset(OpS390XMOVBZreg)
                v.AddArg(x)
                return true
        }
@@ -6354,19 +6528,19 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (AND (MOVDconst [0xFFFFFFFF]) x)
+       // match: (AND (MOVDconst [0xFFFF]) x)
        // cond:
-       // result: (MOVWZreg x)
+       // result: (MOVHZreg x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XMOVDconst {
                        break
                }
-               if v_0.AuxInt != 0xFFFFFFFF {
+               if v_0.AuxInt != 0xFFFF {
                        break
                }
                x := v.Args[1]
-               v.reset(OpS390XMOVWZreg)
+               v.reset(OpS390XMOVHZreg)
                v.AddArg(x)
                return true
        }
@@ -6386,6 +6560,22 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (AND (MOVDconst [0xFFFFFFFF]) x)
+       // cond:
+       // result: (MOVWZreg x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 0xFFFFFFFF {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpS390XMOVWZreg)
+               v.AddArg(x)
+               return true
+       }
        // match: (AND (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [c&d])
@@ -6404,6 +6594,24 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AuxInt = c & d
                return true
        }
+       // match: (AND (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c&d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c & d
+               return true
+       }
        // match: (AND x x)
        // cond:
        // result: x
@@ -6469,6 +6677,58 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (AND <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (AND <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XANDW(v *Value) bool {
@@ -6567,6 +6827,58 @@ func rewriteValueS390X_OpS390XANDW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ANDW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (ANDW <t> x g:(MOVWZload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (ANDWload <t> [off] {sym} x ptr mem)
@@ -6619,6 +6931,58 @@ func rewriteValueS390X_OpS390XANDW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ANDW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XANDWconst(v *Value) bool {
@@ -7235,23 +7599,6 @@ func rewriteValueS390X_OpS390XCMPconst(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpS390XFADD(v *Value) bool {
-       // match: (FADD x (FMUL y z))
-       // cond:
-       // result: (FMADD x y z)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XFMUL {
-                       break
-               }
-               y := v_1.Args[0]
-               z := v_1.Args[1]
-               v.reset(OpS390XFMADD)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(z)
-               return true
-       }
        // match: (FADD (FMUL y z) x)
        // cond:
        // result: (FMADD x y z)
@@ -7269,26 +7616,26 @@ func rewriteValueS390X_OpS390XFADD(v *Value) bool {
                v.AddArg(z)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XFADDS(v *Value) bool {
-       // match: (FADDS x (FMULS y z))
+       // match: (FADD x (FMUL y z))
        // cond:
-       // result: (FMADDS x y z)
+       // result: (FMADD x y z)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpS390XFMULS {
+               if v_1.Op != OpS390XFMUL {
                        break
                }
                y := v_1.Args[0]
                z := v_1.Args[1]
-               v.reset(OpS390XFMADDS)
+               v.reset(OpS390XFMADD)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(z)
                return true
        }
+       return false
+}
+func rewriteValueS390X_OpS390XFADDS(v *Value) bool {
        // match: (FADDS (FMULS y z) x)
        // cond:
        // result: (FMADDS x y z)
@@ -7306,10 +7653,27 @@ func rewriteValueS390X_OpS390XFADDS(v *Value) bool {
                v.AddArg(z)
                return true
        }
+       // match: (FADDS x (FMULS y z))
+       // cond:
+       // result: (FMADDS x y z)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XFMULS {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               v.reset(OpS390XFMADDS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XFMOVDload(v *Value) bool {
-       // match: (FMOVDload  [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is20Bit(off1+off2)
        // result: (FMOVDload [off1+off2] {sym} ptr mem)
        for {
@@ -7615,7 +7979,7 @@ func rewriteValueS390X_OpS390XFMOVDstoreidx(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpS390XFMOVSload(v *Value) bool {
-       // match: (FMOVSload  [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is20Bit(off1+off2)
        // result: (FMOVSload [off1+off2] {sym} ptr mem)
        for {
@@ -8015,7 +8379,7 @@ func rewriteValueS390X_OpS390XMOVBZload(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBZload  [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVBZload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is20Bit(off1+off2)
        // result: (MOVBZload [off1+off2] {sym} ptr mem)
        for {
@@ -8038,7 +8402,7 @@ func rewriteValueS390X_OpS390XMOVBZload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // match: (MOVBZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVBZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -8137,6 +8501,28 @@ func rewriteValueS390X_OpS390XMOVBZloadidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBZloadidx [c] {sym} idx (ADDconst [d] ptr) mem)
+       // cond:
+       // result: (MOVBZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVBZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
        // cond:
        // result: (MOVBZloadidx [c+d] {sym} ptr idx mem)
@@ -8159,20 +8545,42 @@ func rewriteValueS390X_OpS390XMOVBZloadidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVBZreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _))
-       // cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-       // result: (MOVDreg x)
+       // match: (MOVBZloadidx [c] {sym} (ADDconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVBZloadidx [c+d] {sym} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDLT {
-                       break
-               }
-               x_0 := x.Args[0]
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVBZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVBZreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _))
+       // cond: int64(uint8(c)) == c && int64(uint8(d)) == d
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDLT {
+                       break
+               }
+               x_0 := x.Args[0]
                if x_0.Op != OpS390XMOVDconst {
                        break
                }
@@ -8472,7 +8880,7 @@ func rewriteValueS390X_OpS390XMOVBZreg(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpS390XMOVBload(v *Value) bool {
-       // match: (MOVBload   [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is20Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -8647,7 +9055,7 @@ func rewriteValueS390X_OpS390XMOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem)
        // cond: is20Bit(off1+off2)
        // result: (MOVBstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -8695,7 +9103,7 @@ func rewriteValueS390X_OpS390XMOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
+       // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -9245,6 +9653,30 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstoreidx [c] {sym} idx (ADDconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVBstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVBstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
        // cond:
        // result: (MOVBstoreidx [c+d] {sym} ptr idx val mem)
@@ -9269,6 +9701,30 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstoreidx [c] {sym} (ADDconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVBstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVBstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
        // result: (MOVHstoreidx [i-1] {s} p idx w mem)
@@ -9317,20 +9773,63 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
+       // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} idx p (SRDconst [8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
                p := v.Args[0]
                idx := v.Args[1]
-               w0 := v.Args[2]
-               if w0.Op != OpS390XSRDconst {
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
                        break
                }
-               j := w0.AuxInt
-               w := w0.Args[0]
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9351,7 +9850,7 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if x_2.Op != OpS390XSRDconst {
                        break
                }
-               if x_2.AuxInt != j+8 {
+               if x_2.AuxInt != 8 {
                        break
                }
                if w != x_2.Args[0] {
@@ -9366,18 +9865,18 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
+       // match: (MOVBstoreidx [i] {s} idx p w x:(MOVBstoreidx [i-1] {s} idx p (SRDconst [8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
        // result: (MOVHstoreidx [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
+               idx := v.Args[0]
+               p := v.Args[1]
                w := v.Args[2]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
@@ -9389,14 +9888,14 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if x.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x.Args[1] {
                        break
                }
                x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRWconst {
+               if x_2.Op != OpS390XSRDconst {
                        break
                }
                if x_2.AuxInt != 8 {
@@ -9418,7 +9917,7 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
+       // match: (MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
        // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
        for {
@@ -9427,7 +9926,7 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                p := v.Args[0]
                idx := v.Args[1]
                w0 := v.Args[2]
-               if w0.Op != OpS390XSRWconst {
+               if w0.Op != OpS390XSRDconst {
                        break
                }
                j := w0.AuxInt
@@ -9449,7 +9948,7 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                        break
                }
                x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRWconst {
+               if x_2.Op != OpS390XSRDconst {
                        break
                }
                if x_2.AuxInt != j+8 {
@@ -9471,22 +9970,20 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // match: (MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p (SRDconst [j+8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
                p := v.Args[0]
                idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
-                       break
-               }
-               if v_2.AuxInt != 8 {
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
                        break
                }
-               w := v_2.Args[0]
+               j := w0.AuxInt
+               w := w0.Args[0]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9497,42 +9994,49 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if x.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpS390XMOVHBRstoreidx)
+               v.reset(OpS390XMOVHstoreidx)
                v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
+       // match: (MOVBstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
+               j := w0.AuxInt
+               w := w0.Args[0]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9549,21 +10053,21 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if idx != x.Args[1] {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRDconst {
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
                        break
                }
-               if w0.AuxInt != j-8 {
+               if x_2.AuxInt != j+8 {
                        break
                }
-               if w != w0.Args[0] {
+               if w != x_2.Args[0] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpS390XMOVHBRstoreidx)
+               v.reset(OpS390XMOVHstoreidx)
                v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
@@ -9572,22 +10076,20 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // match: (MOVBstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p (SRDconst [j+8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRWconst {
-                       break
-               }
-               if v_2.AuxInt != 8 {
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
                        break
                }
-               w := v_2.Args[0]
+               j := w0.AuxInt
+               w := w0.Args[0]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9598,42 +10100,44 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if x.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpS390XMOVHBRstoreidx)
+               v.reset(OpS390XMOVHstoreidx)
                v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
+       // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
                p := v.Args[0]
                idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRWconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
+               w := v.Args[2]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9650,5582 +10154,20470 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if idx != x.Args[1] {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRWconst {
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
                        break
                }
-               if w0.AuxInt != j-8 {
+               if x_2.AuxInt != 8 {
                        break
                }
-               if w != w0.Args[0] {
+               if w != x_2.Args[0] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpS390XMOVHBRstoreidx)
+               v.reset(OpS390XMOVHstoreidx)
                v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDEQ(v *Value) bool {
-       // match: (MOVDEQ x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDEQ x y cmp)
+       // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} idx p (SRWconst [8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
        for {
-               x := v.Args[0]
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p w x:(MOVBstoreidx [i-1] {s} idx p (SRWconst [8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p (SRWconst [j+8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p (SRWconst [j+8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p w0:(SRDconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p w0:(SRDconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p w0:(SRWconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p w0:(SRWconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDEQ(v *Value) bool {
+       // match: (MOVDEQ x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDEQ x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDEQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDEQ _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDEQ y _ (FlagLT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDEQ y _ (FlagGT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDGE(v *Value) bool {
+       // match: (MOVDGE x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDLE x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDLE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDGE _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDGE y _ (FlagLT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDGE _ x (FlagGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDGT(v *Value) bool {
+       // match: (MOVDGT x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDLT x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDLT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDGT y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDGT y _ (FlagLT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDGT _ x (FlagGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDLE(v *Value) bool {
+       // match: (MOVDLE x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDGE x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDGE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDLE _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDLE _ x (FlagLT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDLE y _ (FlagGT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDLT(v *Value) bool {
+       // match: (MOVDLT x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDGT x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDGT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDLT y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDLT _ x (FlagLT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDLT y _ (FlagGT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDNE(v *Value) bool {
+       // match: (MOVDNE x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDNE x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDNE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDNE y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDNE _ x (FlagLT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDNE _ x (FlagGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDaddridx(v *Value) bool {
+       // match: (MOVDaddridx [c] {s} (ADDconst [d] x) y)
+       // cond: is20Bit(c+d) && x.Op != OpSB
+       // result: (MOVDaddridx [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is20Bit(c+d) && x.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDaddridx)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDaddridx [c] {s} x (ADDconst [d] y))
+       // cond: is20Bit(c+d) && y.Op != OpSB
+       // result: (MOVDaddridx [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is20Bit(c+d) && y.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDaddridx)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
                y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDaddridx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB
+       // result: (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               y := v_1.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDaddridx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDload(v *Value) bool {
+       // match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDreg x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVDload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVDload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVDloadidx [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDloadidx(v *Value) bool {
+       // match: (MOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx [c] {sym} idx (ADDconst [d] ptr) mem)
+       // cond:
+       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
+       // cond:
+       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx [c] {sym} (ADDconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDnop(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVDnop <t> x)
+       // cond: t.Compare(x.Type) == CMPeq
+       // result: x
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if !(t.Compare(x.Type) == CMPeq) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDnop (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVBZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVHZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVHload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVDload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVDload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVBZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVHZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVWZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVDloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVDreg <t> x)
+       // cond: t.Compare(x.Type) == CMPeq
+       // result: x
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if !(t.Compare(x.Type) == CMPeq) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c
+               return true
+       }
+       // match: (MOVDreg x)
+       // cond: x.Uses == 1
+       // result: (MOVDnop x)
+       for {
+               x := v.Args[0]
+               if !(x.Uses == 1) {
+                       break
+               }
+               v.reset(OpS390XMOVDnop)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVBZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVHZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVHload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVDload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVDload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVBZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVHZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVWZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVDloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool {
+       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVDstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [off] {sym} ptr (MOVDconst [c]) mem)
+       // cond: validValAndOff(c,off) && int64(int16(c)) == c && ptr.Op != OpSB
+       // result: (MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(c, off) && int64(int16(c)) == c && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreconst)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVDstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVDstoreidx [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && is20Bit(i-8)   && clobber(x)
+       // result: (STMG2 [i-8] {s} p w0 w1 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w1 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVDstore {
+                       break
+               }
+               if x.AuxInt != i-8 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(i-8) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTMG2)
+               v.AuxInt = i - 8
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
+       // cond: x.Uses == 1   && is20Bit(i-16)   && clobber(x)
+       // result: (STMG3 [i-16] {s} p w0 w1 w2 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w2 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XSTMG2 {
+                       break
+               }
+               if x.AuxInt != i-16 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               w1 := x.Args[2]
+               mem := x.Args[3]
+               if !(x.Uses == 1 && is20Bit(i-16) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTMG3)
+               v.AuxInt = i - 16
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(w2)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
+       // cond: x.Uses == 1   && is20Bit(i-24)   && clobber(x)
+       // result: (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w3 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XSTMG3 {
+                       break
+               }
+               if x.AuxInt != i-24 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               w1 := x.Args[2]
+               w2 := x.Args[3]
+               mem := x.Args[4]
+               if !(x.Uses == 1 && is20Bit(i-24) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTMG4)
+               v.AuxInt = i - 24
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(w2)
+               v.AddArg(w3)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDstoreconst(v *Value) bool {
+       // match: (MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDstoreidx(v *Value) bool {
+       // match: (MOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstoreidx [c] {sym} idx (ADDconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
+       // cond:
+       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstoreidx [c] {sym} (ADDconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHBRstore(v *Value) bool {
+       // match: (MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHBRstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHBRstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHBRstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHBRstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHBRstoreidx(v *Value) bool {
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} idx p w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} idx p w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} idx p w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} idx p w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHZload(v *Value) bool {
+       // match: (MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVHZreg x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVHstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHZreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVHZload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHZload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHZload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZload [off] {sym} (ADD ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVHZloadidx [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHZloadidx(v *Value) bool {
+       // match: (MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZloadidx [c] {sym} idx (ADDconst [d] ptr) mem)
+       // cond:
+       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
+       // cond:
+       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZloadidx [c] {sym} (ADDconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHZreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVHZreg x:(MOVBZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg x:(MOVHZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg x:(Arg <t>))
+       // cond: (is8BitInt(t) || is16BitInt(t)) && !isSigned(t)
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpArg {
+                       break
+               }
+               t := x.Type
+               if !((is8BitInt(t) || is16BitInt(t)) && !isSigned(t)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg x:(MOVBZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg x:(MOVHZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint16(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(uint16(c))
+               return true
+       }
+       // match: (MOVHZreg x:(MOVHZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZreg x:(MOVHZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHload(v *Value) bool {
+       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVHload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVHreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVHload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(Arg <t>))
+       // cond: (is8BitInt(t) || is16BitInt(t)) && isSigned(t)
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpArg {
+                       break
+               }
+               t := x.Type
+               if !((is8BitInt(t) || is16BitInt(t)) && isSigned(t)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int16(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(int16(c))
+               return true
+       }
+       // match: (MOVHreg x:(MOVHZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHstore(v *Value) bool {
+       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVHreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVHZreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVHstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVDconst [c]) mem)
+       // cond: validOff(off) && ptr.Op != OpSB
+       // result: (MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off) && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreconst)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVHstoreidx [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVWstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_1.AuxInt != 16 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVWstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w0 := v.Args[1]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_1.AuxInt != j+16 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVWstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_1.AuxInt != 16 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVWstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w0 := v.Args[1]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_1.AuxInt != j+16 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHstoreconst(v *Value) bool {
+       // match: (MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpS390XMOVHstoreconst {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               mem := x.Args[1]
+               if !(p.Op != OpSB && x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreconst)
+               v.AuxInt = makeValAndOff(ValAndOff(c).Val()&0xffff|ValAndOff(a).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHstoreidx(v *Value) bool {
+       // match: (MOVHstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [c] {sym} idx (ADDconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
+       // cond:
+       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [c] {sym} (ADDconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} idx p (SRDconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w x:(MOVHstoreidx [i-2] {s} idx p (SRDconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} idx p (SRDconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} idx p (SRDconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} idx p (SRWconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w x:(MOVHstoreidx [i-2] {s} idx p (SRWconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} idx p (SRWconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} idx p (SRWconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWBRstore(v *Value) bool {
+       // match: (MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstore [i-4] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWBRstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstore [i-4] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWBRstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWBRstoreidx(v *Value) bool {
+       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} idx p (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} idx p (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} idx p w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} idx p w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWZload(v *Value) bool {
+       // match: (MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVWZreg x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVWstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWZreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVWZload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWZload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWZload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZload [off] {sym} (ADD ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWZloadidx [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWZloadidx(v *Value) bool {
+       // match: (MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZloadidx [c] {sym} idx (ADDconst [d] ptr) mem)
+       // cond:
+       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
+       // cond:
+       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZloadidx [c] {sym} (ADDconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWZreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWZreg x:(MOVBZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVHZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVWZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(Arg <t>))
+       // cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpArg {
+                       break
+               }
+               t := x.Type
+               if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVBZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVHZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVWZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint32(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(uint32(c))
+               return true
+       }
+       // match: (MOVWZreg x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWload(v *Value) bool {
+       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVWload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(Arg <t>))
+       // cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpArg {
+                       break
+               }
+               t := x.Type
+               if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(int32(c))
+               return true
+       }
+       // match: (MOVWreg x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool {
+       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVWreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWZreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVDconst [c]) mem)
+       // cond: validOff(off) && int64(int16(c)) == c && ptr.Op != OpSB
+       // result: (MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off) && int64(int16(c)) == c && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreconst)
+               v.AuxInt = makeValAndOff(int64(int32(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWstoreidx [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVDstore [i-4] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVDstore [i-4] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w0 := v.Args[1]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_1.AuxInt != j+32 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && is20Bit(i-4)   && clobber(x)
+       // result: (STM2 [i-4] {s} p w0 w1 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w1 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(i-4) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTM2)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
+       // cond: x.Uses == 1   && is20Bit(i-8)   && clobber(x)
+       // result: (STM3 [i-8] {s} p w0 w1 w2 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w2 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XSTM2 {
+                       break
+               }
+               if x.AuxInt != i-8 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               w1 := x.Args[2]
+               mem := x.Args[3]
+               if !(x.Uses == 1 && is20Bit(i-8) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTM3)
+               v.AuxInt = i - 8
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(w2)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
+       // cond: x.Uses == 1   && is20Bit(i-12)   && clobber(x)
+       // result: (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w3 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XSTM3 {
+                       break
+               }
+               if x.AuxInt != i-12 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               w1 := x.Args[2]
+               w2 := x.Args[3]
+               mem := x.Args[4]
+               if !(x.Uses == 1 && is20Bit(i-12) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTM4)
+               v.AuxInt = i - 12
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(w2)
+               v.AddArg(w3)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWstoreconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpS390XMOVWstoreconst {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               mem := x.Args[1]
+               if !(p.Op != OpSB && x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = ValAndOff(a).Off()
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDconst, types.UInt64)
+               v0.AuxInt = ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWstoreidx(v *Value) bool {
+       // match: (MOVWstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [c] {sym} idx (ADDconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [c] {sym} (ADDconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} idx p (SRDconst [32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} idx p w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} idx p w x:(MOVWstoreidx [i-4] {s} idx p (SRDconst [32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} idx p (SRDconst [j+32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} idx p (SRDconst [j+32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMULLD(v *Value) bool {
+       // match: (MULLD x (MOVDconst [c]))
+       // cond: is32Bit(c)
+       // result: (MULLDconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpS390XMULLDconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLD (MOVDconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (MULLDconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpS390XMULLDconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLD <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLD <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLD <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLD <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMULLDconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULLDconst [-1] x)
+       // cond:
+       // result: (NEG x)
+       for {
+               if v.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpS390XNEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [0] _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULLDconst [1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [c] x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLDconst [log2(c)] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpS390XSLDconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
+                       break
+               }
+               v.reset(OpS390XSUB)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
+                       break
+               }
+               v.reset(OpS390XADD)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c*d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c * d
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMULLW(v *Value) bool {
+       // match: (MULLW x (MOVDconst [c]))
+       // cond:
+       // result: (MULLWconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XMULLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLW (MOVDconst [c]) x)
+       // cond:
+       // result: (MULLWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpS390XMULLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMULLWconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULLWconst [-1] x)
+       // cond:
+       // result: (NEGW x)
+       for {
+               if v.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpS390XNEGW)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [0] _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULLWconst [1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [c] x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLWconst [log2(c)] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpS390XSLWconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
+                       break
+               }
+               v.reset(OpS390XSUBW)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
+                       break
+               }
+               v.reset(OpS390XADDW)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c*d))])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XNEG(v *Value) bool {
+       // match: (NEG (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = -c
+               return true
+       }
+       // match: (NEG (ADDconst [c] (NEG x)))
+       // cond: c != -(1<<31)
+       // result: (ADDconst [-c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpS390XNEG {
+                       break
+               }
+               x := v_0_0.Args[0]
+               if !(c != -(1 << 31)) {
+                       break
+               }
+               v.reset(OpS390XADDconst)
+               v.AuxInt = -c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XNEGW(v *Value) bool {
+       // match: (NEGW (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(-c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(int32(-c))
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XNOT(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (NOT x)
+       // cond: true
+       // result: (XOR (MOVDconst [-1]) x)
+       for {
+               x := v.Args[0]
+               if !(true) {
+                       break
+               }
+               v.reset(OpS390XXOR)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDconst, types.UInt64)
+               v0.AuxInt = -1
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XNOTW(v *Value) bool {
+       // match: (NOTW x)
+       // cond: true
+       // result: (XORWconst [-1] x)
+       for {
+               x := v.Args[0]
+               if !(true) {
+                       break
+               }
+               v.reset(OpS390XXORWconst)
+               v.AuxInt = -1
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XOR(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (OR x (MOVDconst [c]))
+       // cond: isU32Bit(c)
+       // result: (ORconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isU32Bit(c)) {
+                       break
+               }
+               v.reset(OpS390XORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (MOVDconst [c]) x)
+       // cond: isU32Bit(c)
+       // result: (ORconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isU32Bit(c)) {
+                       break
+               }
+               v.reset(OpS390XORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (SLDconst x [c]) (SRDconst x [d]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XSLDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpS390XRLLGconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (SRDconst x [d]) (SLDconst x [c]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XSRDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSLDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpS390XRLLGconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c|d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c | d
+               return true
+       }
+       // match: (OR (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c|d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c | d
+               return true
+       }
+       // match: (OR x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVBZload [i1] {s} p mem) sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)) x1:(MOVBZload [i1] {s} p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZload [i1] {s} p mem) sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)) x1:(MOVHZload [i1] {s} p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZload [i1] {s} p mem) sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)) x1:(MOVWZload [i1] {s} p mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) y) s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))) s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem)) y) s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))) s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR x1:(MOVBZloadidx [i1] {s} p idx mem) sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVBZloadidx [i1] {s} idx p mem) sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVBZloadidx [i1] {s} p idx mem) sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVBZloadidx [i1] {s} idx p mem) sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)) x1:(MOVBZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)) x1:(MOVBZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)) x1:(MOVBZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)) x1:(MOVBZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZloadidx [i1] {s} p idx mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZloadidx [i1] {s} p idx mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)) x1:(MOVHZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)) x1:(MOVHZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)) x1:(MOVHZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)) x1:(MOVHZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZloadidx [i1] {s} p idx mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZloadidx [i1] {s} idx p mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZloadidx [i1] {s} p idx mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZloadidx [i1] {s} idx p mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)) x1:(MOVWZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem)) x1:(MOVWZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)) x1:(MOVWZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem)) x1:(MOVWZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR x0:(MOVBZload [i0] {s} p mem) sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)) x0:(MOVBZload [i0] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))) r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) y) s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))) s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR x0:(MOVBZloadidx [i0] {s} p idx mem) sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR x0:(MOVBZloadidx [i0] {s} idx p mem) sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR x0:(MOVBZloadidx [i0] {s} p idx mem) sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR x0:(MOVBZloadidx [i0] {s} idx p mem) sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)) x0:(MOVBZloadidx [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)) x0:(MOVBZloadidx [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)) x0:(MOVBZloadidx [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)) x0:(MOVBZloadidx [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDEQ)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDEQ _ x (FlagEQ))
-       // cond:
-       // result: x
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDEQ y _ (FlagLT))
-       // cond:
-       // result: y
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
                        break
                }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDEQ y _ (FlagGT))
-       // cond:
-       // result: y
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))) r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDGE(v *Value) bool {
-       // match: (MOVDGE x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDLE x y cmp)
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem))) r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDLE)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDGE _ x (FlagEQ))
-       // cond:
-       // result: x
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))) r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem))) r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDGE y _ (FlagLT))
-       // cond:
-       // result: y
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDGE _ x (FlagGT))
-       // cond:
-       // result: x
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
                        break
                }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDGT(v *Value) bool {
-       // match: (MOVDGT x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDLT x y cmp)
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDLT)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
-               return true
-       }
-       // match: (MOVDGT y _ (FlagEQ))
-       // cond:
-       // result: y
-       for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDGT y _ (FlagLT))
-       // cond:
-       // result: y
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDGT _ x (FlagGT))
-       // cond:
-       // result: x
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDLE(v *Value) bool {
-       // match: (MOVDLE x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDGE x y cmp)
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDGE)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
-               return true
-       }
-       // match: (MOVDLE _ x (FlagEQ))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDLE _ x (FlagLT))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDLE y _ (FlagGT))
-       // cond:
-       // result: y
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDLT(v *Value) bool {
-       // match: (MOVDLT x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDGT x y cmp)
-       for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDGT)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
-               return true
-       }
-       // match: (MOVDLT y _ (FlagEQ))
-       // cond:
-       // result: y
-       for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       // match: (MOVDLT _ x (FlagLT))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDLT y _ (FlagGT))
-       // cond:
-       // result: y
-       for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDNE(v *Value) bool {
-       // match: (MOVDNE x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDNE x y cmp)
-       for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDNE)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
-               return true
-       }
-       // match: (MOVDNE y _ (FlagEQ))
-       // cond:
-       // result: y
-       for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDNE _ x (FlagLT))
-       // cond:
-       // result: x
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
                        break
                }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDNE _ x (FlagGT))
-       // cond:
-       // result: x
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDaddridx(v *Value) bool {
-       // match: (MOVDaddridx [c] {s} (ADDconst [d] x) y)
-       // cond: is20Bit(c+d) && x.Op != OpSB
-       // result: (MOVDaddridx [c+d] {s} x y)
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is20Bit(c+d) && x.Op != OpSB) {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XMOVDaddridx)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MOVDaddridx [c] {s} x (ADDconst [d] y))
-       // cond: is20Bit(c+d) && y.Op != OpSB
-       // result: (MOVDaddridx [c+d] {s} x y)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is20Bit(c+d) && y.Op != OpSB) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDaddridx)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if idx != x1.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDaddridx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB
-       // result: (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDaddr {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               y := v_1.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB) {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XMOVDaddridx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDload(v *Value) bool {
-       // match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDreg x)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDstore {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDload   [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVDload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVDload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if p != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               if mem != x1.Args[2] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XMOVDloadidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVDloadidx [off] {sym} ptr idx mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XMOVDloadidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDloadidx(v *Value) bool {
-       // match: (MOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVDloadidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
-       // cond:
-       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVDloadidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDnop(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVDnop <t> x)
-       // cond: t.Compare(x.Type) == CMPeq
-       // result: x
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if !(t.Compare(x.Type) == CMPeq) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDnop (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = c
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVBZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               if idx != x1.Args[1] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x1.Args[2] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBZload, t)
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVBload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload  <t> [off] {sym} ptr mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBload {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVHZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVHload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHload  <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHload {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVWZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if idx != x1.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, t)
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVWload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload  <t> [off] {sym} ptr mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWload {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, t)
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVDload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVDload  <t> [off] {sym} ptr mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDload {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, t)
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVBZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZloadidx {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVHZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZloadidx {
+               if mem != x1.Args[2] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, t)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVWZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZloadidx {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVDloadidx  [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDloadidx {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVDreg <t> x)
-       // cond: t.Compare(x.Type) == CMPeq
-       // result: x
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if !(t.Compare(x.Type) == CMPeq) {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = c
-               return true
-       }
-       // match: (MOVDreg x)
-       // cond: x.Uses == 1
-       // result: (MOVDnop x)
-       for {
-               x := v.Args[0]
-               if !(x.Uses == 1) {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               v.reset(OpS390XMOVDnop)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVBZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x0.Args[0] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBZload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVBload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload  <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBload {
+               if idx != x0.Args[1] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[2] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBload, t)
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDreg <t> x:(MOVHZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZload <t> [off] {sym} ptr mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, t)
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDreg <t> x:(MOVHload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHload  <t> [off] {sym} ptr mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHload {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVWZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if mem != x0.Args[2] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, t)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDreg <t> x:(MOVWload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload  <t> [off] {sym} ptr mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWload {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVDload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVDload  <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDload {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVBZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZloadidx {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVHZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZloadidx {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVWZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZloadidx {
+               if idx != x0.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x0.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVDloadidx  [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDloadidx {
+               if mem != x0.Args[2] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, t)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool {
-       // match: (MOVDstore  [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVDstore  [off1+off2] {sym} ptr val mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is20Bit(off1 + off2)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off] {sym} ptr (MOVDconst [c]) mem)
-       // cond: validValAndOff(c,off) && int64(int16(c)) == c && ptr.Op != OpSB
-       // result: (MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(c, off) && int64(int16(c)) == c && ptr.Op != OpSB) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               v.reset(OpS390XMOVDstoreconst)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVDstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVDstoreidx [off] {sym} ptr idx val mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && is20Bit(i-8)   && clobber(x)
-       // result: (STMG2 [i-8] {s} p w0 w1 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w1 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVDstore {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.AuxInt != i-8 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               if x.Aux != s {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if p != x.Args[0] {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               w0 := x.Args[1]
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(i-8) && clobber(x)) {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               v.reset(OpS390XSTMG2)
-               v.AuxInt = i - 8
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
-       // cond: x.Uses == 1   && is20Bit(i-16)   && clobber(x)
-       // result: (STMG3 [i-16] {s} p w0 w1 w2 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w2 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XSTMG2 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x.AuxInt != i-16 {
+               if p != x0.Args[0] {
                        break
                }
-               if x.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               w0 := x.Args[1]
-               w1 := x.Args[2]
-               mem := x.Args[3]
-               if !(x.Uses == 1 && is20Bit(i-16) && clobber(x)) {
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XSTMG3)
-               v.AuxInt = i - 16
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(w2)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
-       // cond: x.Uses == 1   && is20Bit(i-24)   && clobber(x)
-       // result: (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w3 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XSTMG3 {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-24 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               w0 := x.Args[1]
-               w1 := x.Args[2]
-               w2 := x.Args[3]
-               mem := x.Args[4]
-               if !(x.Uses == 1 && is20Bit(i-24) && clobber(x)) {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XSTMG4)
-               v.AuxInt = i - 24
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(w2)
-               v.AddArg(w3)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDstoreconst(v *Value) bool {
-       // match: (MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               v.reset(OpS390XMOVDstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDstoreidx(v *Value) bool {
-       // match: (MOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               if p != x0.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
-       // cond:
-       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHBRstore(v *Value) bool {
-       // match: (MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               if v_1.AuxInt != 16 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHBRstore {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.AuxInt != i-2 {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if p != x.Args[0] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if w != x.Args[1] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w0 mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHBRstore {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRDconst {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if w0.AuxInt != j-16 {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w != w0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRWconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               if v_1.AuxInt != 16 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHBRstore {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.AuxInt != i-2 {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if p != x.Args[0] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if w != x.Args[1] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w0 mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRWconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHBRstore {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRWconst {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if w0.AuxInt != j-16 {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w != w0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHBRstoreidx(v *Value) bool {
-       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               if v_2.AuxInt != 16 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHBRstoreidx {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.AuxInt != i-2 {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XMOVWBRstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHBRstoreidx {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if idx != x.Args[1] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRDconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w0.AuxInt != j-16 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w != w0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWBRstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRWconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               if v_2.AuxInt != 16 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHBRstoreidx {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.AuxInt != i-2 {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if p != x.Args[0] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if idx != x.Args[1] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w != x.Args[2] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWBRstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRWconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHBRstoreidx {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if idx != x.Args[1] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRWconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w0.AuxInt != j-16 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w != w0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWBRstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueS390X_OpS390XMOVHZload(v *Value) bool {
-       // match: (MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVHZreg x)
+func rewriteValueS390X_OpS390XORW(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (ORW x (MOVDconst [c]))
+       // cond:
+       // result: (ORWconst [c] x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVHstore {
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XORWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORW (MOVDconst [c]) x)
+       // cond:
+       // result: (ORWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpS390XORWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORW (SLWconst x [c]) (SRWconst x [d]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XSLWconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpS390XRLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORW (SRWconst x [d]) (SLWconst x [c]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XSRWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSLWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if !(d == 32-c) {
                        break
                }
-               v.reset(OpS390XMOVHZreg)
+               v.reset(OpS390XRLLconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (MOVHZload  [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVHZload [off1+off2] {sym} ptr mem)
+       // match: (ORW x x)
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
                        break
                }
-               v.reset(OpS390XMOVHZload)
-               v.AuxInt = off1 + off2
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
                v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (ORW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVHZload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (ORW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVHZloadidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHZload [off] {sym} (ADD ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVHZloadidx [off] {sym} ptr idx mem)
+       // match: (ORW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVHZloadidx)
+               v.reset(OpS390XORWload)
+               v.Type = t
                v.AuxInt = off
                v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHZloadidx(v *Value) bool {
-       // match: (MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       // match: (ORW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVHZloadidx)
-               v.AuxInt = c + d
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
                v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
-       // cond:
-       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       // match: (ORW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVHZloadidx)
-               v.AuxInt = c + d
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
                v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHZreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVHZreg x:(MOVBZload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (ORW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHZreg x:(MOVHZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVDreg)
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHZreg x:(Arg <t>))
-       // cond: (is8BitInt(t) || is16BitInt(t)) && !isSigned(t)
-       // result: (MOVDreg x)
+       // match: (ORW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
+               t := v.Type
                x := v.Args[0]
-               if x.Op != OpArg {
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
                        break
                }
-               t := x.Type
-               if !((is8BitInt(t) || is16BitInt(t)) && !isSigned(t)) {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVDreg)
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHZreg x:(MOVBZreg _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (ORW x1:(MOVBZload [i1] {s} p mem) sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZreg {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHZreg x:(MOVHZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZreg {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHZreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint16(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(uint16(c))
-               return true
-       }
-       // match: (MOVHZreg x:(MOVHZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, v.Type)
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVHZreg x:(MOVHZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)) x1:(MOVBZload [i1] {s} p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZloadidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if sh.AuxInt != 8 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, v.Type)
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHload(v *Value) bool {
-       // match: (MOVHload   [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVHload  [off1+off2] {sym} ptr mem)
+       // match: (ORW x1:(MOVHZload [i1] {s} p mem) sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZload {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if sh.AuxInt != 16 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZload {
                        break
                }
-               v.reset(OpS390XMOVHload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVHreg x:(MOVBload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHload {
+               if mem != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(Arg <t>))
-       // cond: (is8BitInt(t) || is16BitInt(t)) && isSigned(t)
-       // result: (MOVDreg x)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)) x1:(MOVHZload [i1] {s} p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpArg {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               t := x.Type
-               if !((is8BitInt(t) || is16BitInt(t)) && isSigned(t)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBreg {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZload {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVBZreg _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZreg {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHreg {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int16(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(int16(c))
-               return true
-       }
-       // match: (MOVHreg x:(MOVHZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[0] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, v.Type)
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHstore(v *Value) bool {
-       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVHreg {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVHZreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVHZreg {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore  [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVHstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is20Bit(off1 + off2)) {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVDconst [c]) mem)
-       // cond: validOff(off) && ptr.Op != OpSB
-       // result: (MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off) && ptr.Op != OpSB) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVHstoreconst)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if p != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if mem != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) y) s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHstoreidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVHstoreidx [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHstoreidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHstore {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))) s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x.AuxInt != i-2 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               if p != x.Args[0] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if x_1.AuxInt != 16 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if w != x_1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w0 mem)
+       // match: (ORW x1:(MOVBZloadidx [i1] {s} p idx mem) sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w0 := v.Args[1]
-               if w0.Op != OpS390XSRDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHstore {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst {
+               if p != x0.Args[0] {
                        break
                }
-               if x_1.AuxInt != j+16 {
+               if idx != x0.Args[1] {
                        break
                }
-               if w != x_1.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w mem)
+       // match: (ORW x1:(MOVBZloadidx [i1] {s} idx p mem) sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHstore {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.AuxInt != i-2 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               if sh.AuxInt != 8 {
                        break
                }
-               if p != x.Args[0] {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRWconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x_1.AuxInt != 16 {
+               if p != x0.Args[0] {
                        break
                }
-               if w != x_1.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w0 mem)
+       // match: (ORW x1:(MOVBZloadidx [i1] {s} p idx mem) sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w0 := v.Args[1]
-               if w0.Op != OpS390XSRWconst {
-                       break
-               }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.Aux != s {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if p != x.Args[0] {
+               if sh.AuxInt != 8 {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRWconst {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x_1.AuxInt != j+16 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if w != x_1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if p != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHstoreconst(v *Value) bool {
-       // match: (MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               if mem != x0.Args[2] {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVHstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (ORW x1:(MOVBZloadidx [i1] {s} idx p mem) sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpS390XMOVHstoreconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               mem := x.Args[1]
-               if !(p.Op != OpSB && x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWstoreconst)
-               v.AuxInt = makeValAndOff(ValAndOff(c).Val()&0xffff|ValAndOff(a).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHstoreidx(v *Value) bool {
-       // match: (MOVHstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               if p != x0.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVHstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
-       // cond:
-       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               if mem != x0.Args[2] {
                        break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVHstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)) x1:(MOVBZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHstoreidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRDconst {
+               if p != x1.Args[0] {
                        break
                }
-               if x_2.AuxInt != 16 {
+               if idx != x1.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)) x1:(MOVBZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w0 := v.Args[2]
-               if w0.Op != OpS390XSRDconst {
-                       break
-               }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHstoreidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRDconst {
+               if p != x1.Args[0] {
                        break
                }
-               if x_2.AuxInt != j+16 {
+               if idx != x1.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)) x1:(MOVBZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHstoreidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRWconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if x_2.AuxInt != 16 {
+               if p != x1.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)) x1:(MOVBZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w0 := v.Args[2]
-               if w0.Op != OpS390XSRWconst {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHstoreidx {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRWconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if x_2.AuxInt != j+16 {
+               if p != x1.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWBRstore(v *Value) bool {
-       // match: (MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDBRstore [i-4] {s} p w mem)
+       // match: (ORW x1:(MOVHZloadidx [i1] {s} p idx mem) sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if v_1.AuxInt != 32 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWBRstore {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x.AuxInt != i-4 {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if w != x.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDBRstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDBRstore [i-4] {s} p w0 mem)
+       // match: (ORW x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWBRstore {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-4 {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRDconst {
+               if p != x0.Args[0] {
                        break
                }
-               if w0.AuxInt != j-32 {
+               if idx != x0.Args[1] {
                        break
                }
-               if w != w0.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVDBRstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWBRstoreidx(v *Value) bool {
-       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       // match: (ORW x1:(MOVHZloadidx [i1] {s} p idx mem) sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if v_2.AuxInt != 32 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVWBRstoreidx {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x.AuxInt != i-4 {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVDBRstoreidx)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       // match: (ORW x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVWBRstoreidx {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if x.AuxInt != i-4 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               if sh.AuxInt != 16 {
                        break
                }
-               if p != x.Args[0] {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRDconst {
+               if idx != x0.Args[0] {
                        break
                }
-               if w0.AuxInt != j-32 {
+               if p != x0.Args[1] {
                        break
                }
-               if w != w0.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVDBRstoreidx)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWZload(v *Value) bool {
-       // match: (MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVWZreg x)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)) x1:(MOVHZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVWstore {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XMOVWZreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZload  [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVWZload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               v.reset(OpS390XMOVWZload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWZload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               if idx != x1.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVWZloadidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWZload [off] {sym} (ADD ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWZloadidx [off] {sym} ptr idx mem)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)) x1:(MOVHZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XMOVWZloadidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWZloadidx(v *Value) bool {
-       // match: (MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVWZloadidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
-       // cond:
-       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVWZloadidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWZreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWZreg x:(MOVBZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVHZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVWZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWZreg x:(Arg <t>))
-       // cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)
-       // result: (MOVDreg x)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)) x1:(MOVHZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpArg {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               t := x.Type
-               if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVBZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZreg {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVHZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZreg {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVWZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZreg {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint32(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if idx != x1.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(uint32(c))
-               return true
-       }
-       // match: (MOVWZreg x:(MOVWZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if p != x1.Args[1] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x1.Args[2] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, v.Type)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)) x1:(MOVHZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZloadidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, v.Type)
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWload(v *Value) bool {
-       // match: (MOVWload   [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVWload  [off1+off2] {sym} ptr mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWreg x:(MOVBload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBload {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHload {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWload {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWreg x:(Arg <t>))
-       // cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)
-       // result: (MOVDreg x)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpArg {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               t := x.Type
-               if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBreg {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZreg {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHreg {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHreg {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWreg {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if idx != x1.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(int32(c))
-               return true
-       }
-       // match: (MOVWreg x:(MOVWZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if mem != x1.Args[2] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool {
-       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVWreg {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWZreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVWZreg {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore  [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is20Bit(off1 + off2)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVDconst [c]) mem)
-       // cond: validOff(off) && int64(int16(c)) == c && ptr.Op != OpSB
-       // result: (MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off) && int64(int16(c)) == c && ptr.Op != OpSB) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVWstoreconst)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if idx != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWstoreidx [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               if idx != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVDstore [i-4] {s} p w mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if v_1.AuxInt != 32 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWstore {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x.AuxInt != i-4 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w != x.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVDstore [i-4] {s} p w0 mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w0 := v.Args[1]
-               if w0.Op != OpS390XSRDconst {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWstore {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.AuxInt != i-4 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x.Aux != s {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if p != x.Args[0] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x_1.AuxInt != j+32 {
+               if p != x1.Args[0] {
                        break
                }
-               if w != x_1.Args[0] {
+               if idx != x1.Args[1] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && is20Bit(i-4)   && clobber(x)
-       // result: (STM2 [i-4] {s} p w0 w1 mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w1 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWstore {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-4 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if p != x.Args[0] {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               w0 := x.Args[1]
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(i-4) && clobber(x)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XSTM2)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
-       // cond: x.Uses == 1   && is20Bit(i-8)   && clobber(x)
-       // result: (STM3 [i-8] {s} p w0 w1 w2 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w2 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XSTM2 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x.AuxInt != i-8 {
+               if idx != x1.Args[0] {
                        break
                }
-               if x.Aux != s {
+               if p != x1.Args[1] {
                        break
                }
-               if p != x.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               w0 := x.Args[1]
-               w1 := x.Args[2]
-               mem := x.Args[3]
-               if !(x.Uses == 1 && is20Bit(i-8) && clobber(x)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XSTM3)
-               v.AuxInt = i - 8
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(w2)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
-       // cond: x.Uses == 1   && is20Bit(i-12)   && clobber(x)
-       // result: (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w3 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XSTM3 {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-12 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if p != x.Args[0] {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               w0 := x.Args[1]
-               w1 := x.Args[2]
-               w2 := x.Args[3]
-               mem := x.Args[4]
-               if !(x.Uses == 1 && is20Bit(i-12) && clobber(x)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XSTM4)
-               v.AuxInt = i - 12
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(w2)
-               v.AddArg(w3)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWstoreconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpS390XMOVWstoreconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               mem := x.Args[1]
-               if !(p.Op != OpSB && x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = ValAndOff(a).Off()
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDconst, types.UInt64)
-               v0.AuxInt = ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWstoreidx(v *Value) bool {
-       // match: (MOVWstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               if mem != x0.Args[2] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVWstoreidx {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x.AuxInt != i-4 {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if idx != x.Args[1] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRDconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x_2.AuxInt != 32 {
+               if p != x0.Args[0] {
                        break
                }
-               if w != x_2.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w0 := v.Args[2]
-               if w0.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVWstoreidx {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-4 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.Aux != s {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if p != x.Args[0] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRDconst {
+               if p != x0.Args[0] {
                        break
                }
-               if x_2.AuxInt != j+32 {
+               if idx != x0.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMULLD(v *Value) bool {
-       // match: (MULLD x (MOVDconst [c]))
-       // cond: is32Bit(c)
-       // result: (MULLDconst [c] x)
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpS390XMULLDconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLD (MOVDconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (MULLDconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if p != x0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(is32Bit(c)) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMULLDconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLD <t> x g:(MOVDload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLDload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVDload {
+               if mem != x0.Args[2] {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XMULLDload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MULLD <t> g:(MOVDload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVDload {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMULLDload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMULLDconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULLDconst [-1] x)
-       // cond:
-       // result: (NEG x)
-       for {
-               if v.AuxInt != -1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpS390XNEG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [0] _)
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               if v.AuxInt != 0 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULLDconst [1] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLDconst [log2(c)] x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpS390XSLDconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XSUB)
-               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
-               v0.AuxInt = log2(c + 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               if p != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XADD)
-               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c*d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if mem != x0.Args[2] {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = c * d
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMULLW(v *Value) bool {
-       // match: (MULLW x (MOVDconst [c]))
-       // cond:
-       // result: (MULLWconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpS390XMULLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MULLW (MOVDconst [c]) x)
-       // cond:
-       // result: (MULLWconst [c] x)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpS390XMULLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLW <t> x g:(MOVWload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVWload {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMULLWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MULLW <t> g:(MOVWload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVWload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMULLWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MULLW <t> x g:(MOVWZload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVWZload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XMULLWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVWZload {
+               if p != x0.Args[1] {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMULLWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMULLWconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULLWconst [-1] x)
-       // cond:
-       // result: (NEGW x)
-       for {
-               if v.AuxInt != -1 {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpS390XNEGW)
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MULLWconst [0] _)
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               if v.AuxInt != 0 {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULLWconst [1] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 1 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLWconst [log2(c)] x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XSLWconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XSUBW)
-               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
-               v0.AuxInt = log2(c + 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XADDW)
-               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MULLWconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c*d))])
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(int32(c * d))
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XNEG(v *Value) bool {
-       // match: (NEG (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [-c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = -c
-               return true
-       }
-       // match: (NEG (ADDconst [c] (NEG x)))
-       // cond: c != -(1<<31)
-       // result: (ADDconst [-c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               c := v_0.AuxInt
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpS390XNEG {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x := v_0_0.Args[0]
-               if !(c != -(1 << 31)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XADDconst)
-               v.AuxInt = -c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XNEGW(v *Value) bool {
-       // match: (NEGW (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(-c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(int32(-c))
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XNOT(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (NOT x)
-       // cond: true
-       // result: (XOR (MOVDconst [-1]) x)
-       for {
-               x := v.Args[0]
-               if !(true) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XXOR)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDconst, types.UInt64)
-               v0.AuxInt = -1
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XNOTW(v *Value) bool {
-       // match: (NOTW x)
-       // cond: true
-       // result: (XORWconst [-1] x)
-       for {
-               x := v.Args[0]
-               if !(true) {
+               if p != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XXORWconst)
-               v.AuxInt = -1
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XOR(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (OR x (MOVDconst [c]))
-       // cond: isU32Bit(c)
-       // result: (ORconst [c] x)
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW x0:(MOVBZload [i0] {s} p mem) sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               c := v_1.AuxInt
-               if !(isU32Bit(c)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR (MOVDconst [c]) x)
-       // cond: isU32Bit(c)
-       // result: (ORconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isU32Bit(c)) {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               v.reset(OpS390XORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( OR (SLDconst x [c]) (SRDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [   c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XSLDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               if p != x1.Args[0] {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               if mem != x1.Args[1] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XRLLGconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: ( OR (SRDconst x [c]) (SLDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [64-c] x)
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)) x0:(MOVBZload [i0] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XSRDconst {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSLDconst {
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if x != v_1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpS390XRLLGconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c|d])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if p != x0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               if mem != x0.Args[1] {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = c | d
-               return true
-       }
-       // match: (OR x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> x g:(MOVDload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORload <t> [off] {sym} x ptr mem)
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
        for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVDload {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
                        break
                }
-               v.reset(OpS390XORload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (OR <t> g:(MOVDload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVDload {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XORload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (OR  x:(SLDconst _) y)
-       // cond: y.Op != OpS390XSLDconst
-       // result: (OR  y x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XSLDconst {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               y := v.Args[1]
-               if !(y.Op != OpS390XSLDconst) {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
                        break
                }
-               v.reset(OpS390XOR)
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR                       x0:(MOVBZload [i]   {s} p mem)     s0:(SLDconst [8]  x1:(MOVBZload [i+1] {s} p mem)))     s1:(SLDconst [16] x2:(MOVBZload [i+2] {s} p mem)))     s2:(SLDconst [24] x3:(MOVBZload [i+3] {s} p mem)))     s3:(SLDconst [32] x4:(MOVBZload [i+4] {s} p mem)))     s4:(SLDconst [40] x5:(MOVBZload [i+5] {s} p mem)))     s5:(SLDconst [48] x6:(MOVBZload [i+6] {s} p mem)))     s6:(SLDconst [56] x7:(MOVBZload [i+7] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRload [i] {s} p mem)
-       for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XOR {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               o1 := o0.Args[0]
-               if o1.Op != OpS390XOR {
+               if p != x1.Args[0] {
                        break
                }
-               o2 := o1.Args[0]
-               if o2.Op != OpS390XOR {
+               if mem != x1.Args[1] {
                        break
                }
-               o3 := o2.Args[0]
-               if o3.Op != OpS390XOR {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpS390XOR {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpS390XOR {
+               if sh.AuxInt != 16 {
                        break
                }
-               x0 := o5.Args[0]
-               if x0.Op != OpS390XMOVBZload {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o5.Args[1]
-               if s0.Op != OpS390XSLDconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
                        break
                }
-               if s0.AuxInt != 8 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
                        break
                }
-               if x1.AuxInt != i+1 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x1.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if mem != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpS390XSLDconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               if s1.AuxInt != 16 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZload {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if x2.AuxInt != i+2 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x2.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x2.Args[0] {
+               if mem != x0.Args[1] {
                        break
                }
-               if mem != x2.Args[1] {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpS390XSLDconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x3.AuxInt != i+3 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x3.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if p != x3.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if mem != x3.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpS390XSLDconst {
+               if mem != x0.Args[1] {
                        break
                }
-               if s3.AuxInt != 32 {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpS390XMOVBZload {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) y) s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x4.AuxInt != i+4 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x4.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if p != x4.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x4.Args[1] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpS390XSLDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if s4.AuxInt != 40 {
+               if p != x1.Args[0] {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpS390XMOVBZload {
+               if mem != x1.Args[1] {
                        break
                }
-               if x5.AuxInt != i+5 {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if x5.Aux != s {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))) s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if p != x5.Args[0] {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x5.Args[1] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpS390XSLDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if s5.AuxInt != 48 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x6.AuxInt != i+6 {
+               if p != x1.Args[0] {
                        break
                }
-               if x6.Aux != s {
+               if mem != x1.Args[1] {
                        break
                }
-               if p != x6.Args[0] {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x6.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW x0:(MOVBZloadidx [i0] {s} p idx mem) sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpS390XSLDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpS390XMOVBZload {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x7.AuxInt != i+7 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x7.Aux != s {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRload, types.UInt64)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR                       x0:(MOVBZloadidx [i]   {s} p idx mem)     s0:(SLDconst [8]  x1:(MOVBZloadidx [i+1] {s} p idx mem)))     s1:(SLDconst [16] x2:(MOVBZloadidx [i+2] {s} p idx mem)))     s2:(SLDconst [24] x3:(MOVBZloadidx [i+3] {s} p idx mem)))     s3:(SLDconst [32] x4:(MOVBZloadidx [i+4] {s} p idx mem)))     s4:(SLDconst [40] x5:(MOVBZloadidx [i+5] {s} p idx mem)))     s5:(SLDconst [48] x6:(MOVBZloadidx [i+6] {s} p idx mem)))     s6:(SLDconst [56] x7:(MOVBZloadidx [i+7] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRloadidx <v.Type> [i] {s} p idx mem)
+       // match: (ORW x0:(MOVBZloadidx [i0] {s} idx p mem) sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XOR {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpS390XOR {
-                       break
-               }
-               o2 := o1.Args[0]
-               if o2.Op != OpS390XOR {
-                       break
-               }
-               o3 := o2.Args[0]
-               if o3.Op != OpS390XOR {
-                       break
-               }
-               o4 := o3.Args[0]
-               if o4.Op != OpS390XOR {
-                       break
-               }
-               o5 := o4.Args[0]
-               if o5.Op != OpS390XOR {
-                       break
-               }
-               x0 := o5.Args[0]
+               x0 := v.Args[0]
                if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
+               idx := x0.Args[0]
+               p := x0.Args[1]
                mem := x0.Args[2]
-               s0 := o5.Args[1]
-               if s0.Op != OpS390XSLDconst {
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := s0.Args[0]
+               x1 := sh.Args[0]
                if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -15238,1059 +30630,1381 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpS390XSLDconst {
-                       break
-               }
-               if s1.AuxInt != 16 {
-                       break
-               }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZloadidx {
-                       break
-               }
-               if x2.AuxInt != i+2 {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if x2.Aux != s {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW x0:(MOVBZloadidx [i0] {s} p idx mem) sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if idx != x2.Args[1] {
+               if sh.AuxInt != 8 {
                        break
                }
-               if mem != x2.Args[2] {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpS390XSLDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if s2.AuxInt != 24 {
+               if idx != x1.Args[0] {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpS390XMOVBZloadidx {
+               if p != x1.Args[1] {
                        break
                }
-               if x3.AuxInt != i+3 {
+               if mem != x1.Args[2] {
                        break
                }
-               if x3.Aux != s {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if p != x3.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW x0:(MOVBZloadidx [i0] {s} idx p mem) sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x3.Args[1] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x3.Args[2] {
+               if sh.AuxInt != 8 {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpS390XSLDconst {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s3.AuxInt != 32 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpS390XMOVBZloadidx {
+               if idx != x1.Args[0] {
                        break
                }
-               if x4.AuxInt != i+4 {
+               if p != x1.Args[1] {
                        break
                }
-               if x4.Aux != s {
+               if mem != x1.Args[2] {
                        break
                }
-               if p != x4.Args[0] {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if idx != x4.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)) x0:(MOVBZloadidx [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x4.Args[2] {
+               if sh.AuxInt != 8 {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpS390XSLDconst {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s4.AuxInt != 40 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpS390XMOVBZloadidx {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x5.AuxInt != i+5 {
+               if p != x0.Args[0] {
                        break
                }
-               if x5.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x5.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if idx != x5.Args[1] {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if mem != x5.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)) x0:(MOVBZloadidx [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpS390XSLDconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               if s5.AuxInt != 48 {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpS390XMOVBZloadidx {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x6.AuxInt != i+6 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x6.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x6.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if idx != x6.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if mem != x6.Args[2] {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpS390XSLDconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)) x0:(MOVBZloadidx [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpS390XMOVBZloadidx {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x7.AuxInt != i+7 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x7.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x7.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR                       x0:(MOVBZload [i]   {s} p mem)     s0:(SLDconst [8]  x1:(MOVBZload [i-1] {s} p mem)))     s1:(SLDconst [16] x2:(MOVBZload [i-2] {s} p mem)))     s2:(SLDconst [24] x3:(MOVBZload [i-3] {s} p mem)))     s3:(SLDconst [32] x4:(MOVBZload [i-4] {s} p mem)))     s4:(SLDconst [40] x5:(MOVBZload [i-5] {s} p mem)))     s5:(SLDconst [48] x6:(MOVBZload [i-6] {s} p mem)))     s6:(SLDconst [56] x7:(MOVBZload [i-7] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload [i-7] {s} p mem)
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)) x0:(MOVBZloadidx [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XOR {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpS390XOR {
-                       break
-               }
-               o2 := o1.Args[0]
-               if o2.Op != OpS390XOR {
-                       break
-               }
-               o3 := o2.Args[0]
-               if o3.Op != OpS390XOR {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpS390XOR {
+               if sh.AuxInt != 8 {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpS390XOR {
-                       break
-               }
-               x0 := o5.Args[0]
-               if x0.Op != OpS390XMOVBZload {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o5.Args[1]
-               if s0.Op != OpS390XSLDconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               if idx != x0.Args[0] {
                        break
                }
-               if x1.AuxInt != i-1 {
+               if p != x0.Args[1] {
                        break
                }
-               if x1.Aux != s {
+               if mem != x0.Args[2] {
                        break
                }
-               if p != x1.Args[0] {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if mem != x1.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpS390XSLDconst {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if s1.AuxInt != 16 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZload {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x2.AuxInt != i-2 {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if x2.Aux != s {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if mem != x2.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpS390XSLDconst {
+               if idx != x1.Args[1] {
                        break
                }
-               if s2.AuxInt != 24 {
+               if mem != x1.Args[2] {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpS390XMOVBZload {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if x3.AuxInt != i-3 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x3.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x3.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x3.Args[1] {
+               if sh.AuxInt != 16 {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpS390XSLDconst {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if s3.AuxInt != 32 {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x4.AuxInt != i-4 {
+               if p != x1.Args[0] {
                        break
                }
-               if x4.Aux != s {
+               if idx != x1.Args[1] {
                        break
                }
-               if p != x4.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               if mem != x4.Args[1] {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpS390XSLDconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if s4.AuxInt != 40 {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x5.AuxInt != i-5 {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x5.Aux != s {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if p != x5.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if mem != x5.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpS390XSLDconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if s5.AuxInt != 48 {
+               if p != x1.Args[1] {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpS390XMOVBZload {
+               if mem != x1.Args[2] {
                        break
                }
-               if x6.AuxInt != i-6 {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if x6.Aux != s {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if p != x6.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if mem != x6.Args[1] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpS390XSLDconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               if s6.AuxInt != 56 {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpS390XMOVBZload {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x7.AuxInt != i-7 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x7.Aux != s {
+               if idx != x1.Args[0] {
                        break
                }
-               if p != x7.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, types.UInt64)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 7
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR                       x0:(MOVBZloadidx [i]   {s} p idx mem)     s0:(SLDconst [8]  x1:(MOVBZloadidx [i-1] {s} p idx mem)))     s1:(SLDconst [16] x2:(MOVBZloadidx [i-2] {s} p idx mem)))     s2:(SLDconst [24] x3:(MOVBZloadidx [i-3] {s} p idx mem)))     s3:(SLDconst [32] x4:(MOVBZloadidx [i-4] {s} p idx mem)))     s4:(SLDconst [40] x5:(MOVBZloadidx [i-5] {s} p idx mem)))     s5:(SLDconst [48] x6:(MOVBZloadidx [i-6] {s} p idx mem)))     s6:(SLDconst [56] x7:(MOVBZloadidx [i-7] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <v.Type> [i-7] {s} p idx mem)
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XOR {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpS390XOR {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               o2 := o1.Args[0]
-               if o2.Op != OpS390XOR {
+               if sh.AuxInt != 16 {
                        break
                }
-               o3 := o2.Args[0]
-               if o3.Op != OpS390XOR {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpS390XOR {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpS390XOR {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               x0 := o5.Args[0]
-               if x0.Op != OpS390XMOVBZloadidx {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := o5.Args[1]
-               if s0.Op != OpS390XSLDconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if s0.AuxInt != 8 {
+               if p != x0.Args[0] {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZloadidx {
+               if idx != x0.Args[1] {
                        break
                }
-               if x1.AuxInt != i-1 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x1.Aux != s {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if p != x1.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if idx != x1.Args[1] {
+               if sh.AuxInt != 16 {
                        break
                }
-               if mem != x1.Args[2] {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpS390XSLDconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if s1.AuxInt != 16 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZloadidx {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x2.AuxInt != i-2 {
+               if p != x0.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x2.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if idx != x2.Args[1] {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if mem != x2.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpS390XSLDconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               if s2.AuxInt != 24 {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpS390XMOVBZloadidx {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x3.AuxInt != i-3 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x3.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x3.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x3.Args[1] {
+               if idx != x0.Args[0] {
                        break
                }
-               if mem != x3.Args[2] {
+               if p != x0.Args[1] {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpS390XSLDconst {
+               if mem != x0.Args[2] {
                        break
                }
-               if s3.AuxInt != 32 {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpS390XMOVBZloadidx {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x4.AuxInt != i-4 {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x4.Aux != s {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if p != x4.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if idx != x4.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if mem != x4.Args[2] {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpS390XSLDconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if s4.AuxInt != 40 {
+               if idx != x0.Args[0] {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpS390XMOVBZloadidx {
+               if p != x0.Args[1] {
                        break
                }
-               if x5.AuxInt != i-5 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x5.Aux != s {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if p != x5.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if idx != x5.Args[1] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if mem != x5.Args[2] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpS390XSLDconst {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s5.AuxInt != 48 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpS390XMOVBZloadidx {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x6.AuxInt != i-6 {
+               if p != x0.Args[0] {
                        break
                }
-               if x6.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x6.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if idx != x6.Args[1] {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x6.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpS390XSLDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s6.AuxInt != 56 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpS390XMOVBZloadidx {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x7.AuxInt != i-7 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x7.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x7.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if idx != x7.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 7
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XORW(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (ORW x (MOVDconst [c]))
-       // cond:
-       // result: (ORWconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               v.reset(OpS390XORWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW (MOVDconst [c]) x)
-       // cond:
-       // result: (ORWconst [c] x)
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpS390XORWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORW (SLWconst x [c]) (SRWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [   c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XSLWconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRWconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x != v_1.Args[0] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XRLLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORW (SRWconst x [c]) (SLWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [32-c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XSRWconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSLWconst {
+               if idx != x0.Args[0] {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               if p != x0.Args[1] {
                        break
                }
-               if x != v_1.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XRLLconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORW x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW <t> x g:(MOVWload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORWload <t> [off] {sym} x ptr mem)
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVWload {
-                       break
-               }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XORWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORW <t> g:(MOVWload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVWload {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               v.reset(OpS390XORWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORW <t> x g:(MOVWZload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVWZload {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XORWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORW <t> g:(MOVWZload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVWZload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if idx != x0.Args[0] {
                        break
-               }
-               v.reset(OpS390XORWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORW x:(SLWconst _) y)
-       // cond: y.Op != OpS390XSLWconst
-       // result: (ORW y x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XSLWconst {
+               }
+               if p != x0.Args[1] {
                        break
                }
-               y := v.Args[1]
-               if !(y.Op != OpS390XSLWconst) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XORW)
-               v.AddArg(y)
-               v.AddArg(x)
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW                 x0:(MOVBZload [i]   {s} p mem)     s0:(SLWconst [8] x1:(MOVBZload [i+1] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i] {s} p mem))
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpS390XMOVBZload {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpS390XSLWconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s0.AuxInt != 8 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i+1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
-               v1.AuxInt = i
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRload [i] {s} p mem))     s0:(SLWconst [16] x1:(MOVBZload [i+2] {s} p mem)))     s1:(SLWconst [24] x2:(MOVBZload [i+3] {s} p mem)))
-       // cond: p.Op != OpSB   && z0.Uses == 1   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(z0)   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWBRload [i] {s} p mem)
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XORW {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               z0 := o0.Args[0]
-               if z0.Op != OpS390XMOVHZreg {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x0 := z0.Args[0]
-               if x0.Op != OpS390XMOVHBRload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
+               y := or.Args[0]
+               s0 := or.Args[1]
                if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 16 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x1.AuxInt != i+2 {
+               if p != x0.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x1.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if mem != x1.Args[1] {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               s1 := v.Args[1]
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               s1 := v.Args[0]
                if s1.Op != OpS390XSLWconst {
                        break
                }
-               if s1.AuxInt != 24 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x2.AuxInt != i+3 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x2.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if mem != x2.Args[1] {
+               if idx != x0.Args[0] {
                        break
                }
-               if !(p.Op != OpSB && z0.Uses == 1 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(z0) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if p != x0.Args[1] {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW                 x0:(MOVBZloadidx [i]   {s} p idx mem)     s0:(SLWconst [8] x1:(MOVBZloadidx [i+1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx <v.Type> [i] {s} p idx mem))
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpS390XMOVBZloadidx {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpS390XSLWconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s0.AuxInt != 8 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZloadidx {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i+1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, v.Type)
-               v1.AuxInt = i
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRloadidx [i] {s} p idx mem))     s0:(SLWconst [16] x1:(MOVBZloadidx [i+2] {s} p idx mem)))     s1:(SLWconst [24] x2:(MOVBZloadidx [i+3] {s} p idx mem)))
-       // cond: z0.Uses == 1   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(z0)   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWZreg (MOVWBRloadidx <v.Type> [i] {s} p idx mem))
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XORW {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               z0 := o0.Args[0]
-               if z0.Op != OpS390XMOVHZreg {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x0 := z0.Args[0]
-               if x0.Op != OpS390XMOVHBRloadidx {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != OpS390XSLWconst {
-                       break
-               }
-               if s0.AuxInt != 16 {
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               x1 := s0.Args[0]
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
                if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x1.AuxInt != i+2 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -16303,305 +32017,495 @@ func rewriteValueS390X_OpS390XORW(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != OpS390XSLWconst {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if s1.AuxInt != 24 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZloadidx {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x2.AuxInt != i+3 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x2.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x2.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x2.Args[2] {
+               if idx != x1.Args[1] {
                        break
                }
-               if !(z0.Uses == 1 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(z0) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if mem != x1.Args[2] {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, v.Type)
-               v1.AuxInt = i
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW                  x0:(MOVBZload [i]   {s} p mem)     s0:(SLWconst [8] x1:(MOVBZload [i-1] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVHZload [i-1] {s} p mem)
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpS390XMOVBZload {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := v.Args[1]
+               y := or.Args[0]
+               s0 := or.Args[1]
                if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i-1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 1
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW o0:(ORW x0:(MOVHZload [i] {s} p mem)     s0:(SLWconst [16] x1:(MOVBZload [i-1] {s} p mem)))     s1:(SLWconst [24] x2:(MOVBZload [i-2] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWZload [i-2] {s} p mem)
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XORW {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpS390XMOVHZload {
-                       break
-               }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
+               y := or.Args[0]
+               s0 := or.Args[1]
                if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 16 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i-1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != OpS390XSLWconst {
+               if mem != x1.Args[2] {
                        break
                }
-               if s1.AuxInt != 24 {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZload {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x2.AuxInt != i-2 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x2.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x2.Args[1] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 2
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW                 x0:(MOVBZloadidx [i]   {s} p idx mem)     s0:(SLWconst [8] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVHZloadidx <v.Type> [i-1] {s} p idx mem)
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpS390XMOVBZloadidx {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := v.Args[1]
+               s0 := or.Args[0]
                if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZloadidx {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i-1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, v.Type)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 1
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW o0:(ORW x0:(MOVHZloadidx [i] {s} p idx mem)     s0:(SLWconst [16] x1:(MOVBZloadidx [i-1] {s} p idx mem)))     s1:(SLWconst [24] x2:(MOVBZloadidx [i-2] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWZloadidx <v.Type> [i-2] {s} p idx mem)
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XORW {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpS390XMOVHZloadidx {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != OpS390XSLWconst {
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 16 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZloadidx {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x1.AuxInt != i-1 {
+               if idx != x1.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               if p != x1.Args[1] {
                        break
                }
-               if p != x1.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               if idx != x1.Args[1] {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x1.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != OpS390XSLWconst {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s1.AuxInt != 24 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZloadidx {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x2.AuxInt != i-2 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x2.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x2.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x2.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x2.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 2
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
        return false
@@ -17438,9 +33342,9 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XOR (SLDconst x [c]) (SRDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [   c] x)
+       // match: (XOR (SLDconst x [c]) (SRDconst x [d]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSLDconst {
@@ -17452,10 +33356,11 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                if v_1.Op != OpS390XSRDconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpS390XRLLGconst)
@@ -17463,28 +33368,29 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XOR (SRDconst x [c]) (SLDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [64-c] x)
+       // match: (XOR (SRDconst x [d]) (SLDconst x [c]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSRDconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XSLDconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpS390XRLLGconst)
-               v.AuxInt = 64 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -17506,6 +33412,24 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                v.AuxInt = c ^ d
                return true
        }
+       // match: (XOR (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c^d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c ^ d
+               return true
+       }
        // match: (XOR x x)
        // cond:
        // result: (MOVDconst [0])
@@ -17570,6 +33494,58 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (XOR <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XOR <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XXORW(v *Value) bool {
@@ -17603,9 +33579,9 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORW (SLWconst x [c]) (SRWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [   c] x)
+       // match: (XORW (SLWconst x [c]) (SRWconst x [d]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSLWconst {
@@ -17617,10 +33593,11 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                if v_1.Op != OpS390XSRWconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpS390XRLLconst)
@@ -17628,28 +33605,29 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORW (SRWconst x [c]) (SLWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [32-c] x)
+       // match: (XORW (SRWconst x [d]) (SLWconst x [c]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XSLWconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpS390XRLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -17717,6 +33695,58 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (XORW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (XORW <t> x g:(MOVWZload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (XORWload <t> [off] {sym} x ptr mem)
@@ -17769,6 +33799,58 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (XORW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XXORWconst(v *Value) bool {
@@ -17874,7 +33956,7 @@ func rewriteValueS390X_OpSelect0(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpSelect1(v *Value) bool {
-       // match: (Select1     (AddTupleFirst32 tuple _  ))
+       // match: (Select1 (AddTupleFirst32 tuple _))
        // cond:
        // result: (Select1 tuple)
        for {
@@ -17887,7 +33969,7 @@ func rewriteValueS390X_OpSelect1(v *Value) bool {
                v.AddArg(tuple)
                return true
        }
-       // match: (Select1     (AddTupleFirst64 tuple _  ))
+       // match: (Select1 (AddTupleFirst64 tuple _))
        // cond:
        // result: (Select1 tuple)
        for {
@@ -17936,7 +34018,7 @@ func rewriteValueS390X_OpSignExt32to64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  x)
+       // match: (SignExt8to16 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -17947,7 +34029,7 @@ func rewriteValueS390X_OpSignExt8to16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  x)
+       // match: (SignExt8to32 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -17958,7 +34040,7 @@ func rewriteValueS390X_OpSignExt8to32(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSignExt8to64(v *Value) bool {
-       // match: (SignExt8to64  x)
+       // match: (SignExt8to64 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -18117,7 +34199,7 @@ func rewriteValueS390X_OpStore(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpSub16(v *Value) bool {
-       // match: (Sub16  x y)
+       // match: (Sub16 x y)
        // cond:
        // result: (SUBW  x y)
        for {
@@ -18130,7 +34212,7 @@ func rewriteValueS390X_OpSub16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSub32(v *Value) bool {
-       // match: (Sub32  x y)
+       // match: (Sub32 x y)
        // cond:
        // result: (SUBW  x y)
        for {
@@ -18156,7 +34238,7 @@ func rewriteValueS390X_OpSub32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSub64(v *Value) bool {
-       // match: (Sub64  x y)
+       // match: (Sub64 x y)
        // cond:
        // result: (SUB  x y)
        for {
@@ -18182,7 +34264,7 @@ func rewriteValueS390X_OpSub64F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSub8(v *Value) bool {
-       // match: (Sub8   x y)
+       // match: (Sub8 x y)
        // cond:
        // result: (SUBW  x y)
        for {
@@ -18208,7 +34290,7 @@ func rewriteValueS390X_OpSubPtr(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  x)
+       // match: (Trunc16to8 x)
        // cond:
        // result: x
        for {
@@ -18232,7 +34314,7 @@ func rewriteValueS390X_OpTrunc32to16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  x)
+       // match: (Trunc32to8 x)
        // cond:
        // result: x
        for {
@@ -18268,7 +34350,7 @@ func rewriteValueS390X_OpTrunc64to32(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpTrunc64to8(v *Value) bool {
-       // match: (Trunc64to8  x)
+       // match: (Trunc64to8 x)
        // cond:
        // result: x
        for {
@@ -18319,7 +34401,7 @@ func rewriteValueS390X_OpXor64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpXor8(v *Value) bool {
-       // match: (Xor8  x y)
+       // match: (Xor8 x y)
        // cond:
        // result: (XORW x y)
        for {
@@ -18555,7 +34637,7 @@ func rewriteValueS390X_OpZeroExt32to64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  x)
+       // match: (ZeroExt8to16 x)
        // cond:
        // result: (MOVBZreg x)
        for {
@@ -18566,7 +34648,7 @@ func rewriteValueS390X_OpZeroExt8to16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  x)
+       // match: (ZeroExt8to32 x)
        // cond:
        // result: (MOVBZreg x)
        for {
@@ -18577,7 +34659,7 @@ func rewriteValueS390X_OpZeroExt8to32(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpZeroExt8to64(v *Value) bool {
-       // match: (ZeroExt8to64  x)
+       // match: (ZeroExt8to64 x)
        // cond:
        // result: (MOVBZreg x)
        for {
index 2782316c7ee836c203dec7fccac3c0fc0a3caeff..2be17ef459fc64468b4142069a98e91b6e5aa7a0 100644 (file)
@@ -34,7 +34,7 @@ func rewriteValuedec(v *Value) bool {
        return false
 }
 func rewriteValuedec_OpComplexImag(v *Value) bool {
-       // match: (ComplexImag (ComplexMake _ imag ))
+       // match: (ComplexImag (ComplexMake _ imag))
        // cond:
        // result: imag
        for {
@@ -51,7 +51,7 @@ func rewriteValuedec_OpComplexImag(v *Value) bool {
        return false
 }
 func rewriteValuedec_OpComplexReal(v *Value) bool {
-       // match: (ComplexReal (ComplexMake real _  ))
+       // match: (ComplexReal (ComplexMake real _))
        // cond:
        // result: real
        for {
@@ -274,7 +274,7 @@ func rewriteValuedec_OpSliceLen(v *Value) bool {
        return false
 }
 func rewriteValuedec_OpSlicePtr(v *Value) bool {
-       // match: (SlicePtr (SliceMake ptr _ _ ))
+       // match: (SlicePtr (SliceMake ptr _ _))
        // cond:
        // result: ptr
        for {
index 8ab751797b493127c61e0d9e3f0718947dd2e9a2..7f440875d073d65134c3806549a455e9380003dc 100644 (file)
@@ -422,7 +422,7 @@ func rewriteValuegeneric(v *Value) bool {
 func rewriteValuegeneric_OpAdd16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Add16  (Const16 [c])  (Const16 [d]))
+       // match: (Add16 (Const16 [c]) (Const16 [d]))
        // cond:
        // result: (Const16 [int64(int16(c+d))])
        for {
@@ -440,25 +440,22 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AuxInt = int64(int16(c + d))
                return true
        }
-       // match: (Add16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Add16 (Const16 <t> [c]) x)
+       // match: (Add16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c+d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
-                       break
-               }
-               v.reset(OpAdd16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c + d))
                return true
        }
        // match: (Add16 (Const16 [0]) x)
@@ -478,6 +475,23 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Add16 x (Const16 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Add16 (Const16 [1]) (Com16 x))
        // cond:
        // result: (Neg16 x)
@@ -498,20 +512,23 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add16 x l:(Add16 _ _))
-       // cond: (x.Op != OpAdd16 && x.Op != OpConst16)
-       // result: (Add16 l x)
+       // match: (Add16 (Com16 x) (Const16 [1]))
+       // cond:
+       // result: (Neg16 x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAdd16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpCom16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
-               if !(x.Op != OpAdd16 && x.Op != OpConst16) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAdd16)
-               v.AddArg(l)
+               v.reset(OpNeg16)
                v.AddArg(x)
                return true
        }
@@ -541,6 +558,84 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Add16 (Add16 z i:(Const16 <t>)) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Add16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add16 x (Add16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Add16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd16 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add16 x (Add16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Add16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd16 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Add16 (Sub16 i:(Const16 <t>) z) x)
        // cond: (z.Op != OpConst16 && x.Op != OpConst16)
        // result: (Add16 i (Sub16 <t> x z))
@@ -593,6 +688,58 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Add16 x (Sub16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Sub16 <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub16 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub16, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add16 (Sub16 i:(Const16 <t>) z) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Sub16 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub16 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub16, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Add16 (Sub16 z i:(Const16 <t>)) x)
        // cond: (z.Op != OpConst16 && x.Op != OpConst16)
        // result: (Sub16 (Add16 <t> x z) i)
@@ -645,39 +792,61 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x))
-       // cond:
-       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
+       // match: (Add16 x (Sub16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Sub16 (Add16 <t> x z) i)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub16 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAdd16 {
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
                        break
                }
-               if v_1_0.Type != t {
+               v.reset(OpSub16)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               v.AddArg(i)
+               return true
+       }
+       // match: (Add16 (Sub16 z i:(Const16 <t>)) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Sub16 (Add16 <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub16 {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
-               v.reset(OpAdd16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = int64(int16(c + d))
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpSub16)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
                v.AddArg(v0)
-               v.AddArg(x)
+               v.AddArg(i)
                return true
        }
-       // match: (Add16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x))
+       // match: (Add16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x))
        // cond:
-       // result: (Sub16 (Const16 <t> [int64(int16(c+d))]) x)
+       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst16 {
@@ -686,7 +855,7 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub16 {
+               if v_1.Op != OpAdd16 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -698,16 +867,16 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpSub16)
+               v.reset(OpAdd16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add16 (Const16 <t> [c]) (Sub16 x (Const16 <t> [d])))
+       // match: (Add16 (Const16 <t> [c]) (Add16 x (Const16 <t> [d])))
        // cond:
-       // result: (Add16 (Const16 <t> [int64(int16(c-d))]) x)
+       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst16 {
@@ -716,7 +885,7 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub16 {
+               if v_1.Op != OpAdd16 {
                        break
                }
                x := v_1.Args[0]
@@ -730,25 +899,205 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                d := v_1_1.AuxInt
                v.reset(OpAdd16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = int64(int16(c - d))
+               v0.AuxInt = int64(int16(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAdd32(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (Add32  (Const32 [c])  (Const32 [d]))
+       // match: (Add16 (Add16 (Const16 <t> [d]) x) (Const16 <t> [c]))
        // cond:
-       // result: (Const32 [int64(int32(c+d))])
+       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAdd16 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Add16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (Sub16 (Const16 <t> [int64(int16(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpSub16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Sub16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Sub16 (Const16 <t> [int64(int16(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpSub16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Const16 <t> [c]) (Sub16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Add16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Sub16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Add16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd32(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (Add32 (Const32 [c]) (Const32 [d]))
+       // cond:
+       // result: (Const32 [int64(int32(c+d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
@@ -758,25 +1107,22 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AuxInt = int64(int32(c + d))
                return true
        }
-       // match: (Add32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Add32 (Const32 <t> [c]) x)
+       // match: (Add32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c+d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
-                       break
-               }
-               v.reset(OpAdd32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c + d))
                return true
        }
        // match: (Add32 (Const32 [0]) x)
@@ -796,6 +1142,23 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Add32 x (Const32 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Add32 (Const32 [1]) (Com32 x))
        // cond:
        // result: (Neg32 x)
@@ -816,20 +1179,23 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add32 x l:(Add32 _ _))
-       // cond: (x.Op != OpAdd32 && x.Op != OpConst32)
-       // result: (Add32 l x)
+       // match: (Add32 (Com32 x) (Const32 [1]))
+       // cond:
+       // result: (Neg32 x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAdd32 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpCom32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpAdd32 && x.Op != OpConst32) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAdd32)
-               v.AddArg(l)
+               v.reset(OpNeg32)
                v.AddArg(x)
                return true
        }
@@ -859,39 +1225,39 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add32 (Sub32 i:(Const32 <t>) z) x)
+       // match: (Add32 (Add32 z i:(Const32 <t>)) x)
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Add32 i (Sub32 <t> x z))
+       // result: (Add32 i (Add32 <t> z x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpSub32 {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst32 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
                }
                v.reset(OpAdd32)
                v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpSub32, t)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
                v0.AddArg(z)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (Add32 x (Sub32 i:(Const32 <t>) z))
+       // match: (Add32 x (Add32 i:(Const32 <t>) z))
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Add32 i (Sub32 <t> x z))
+       // result: (Add32 i (Add32 <t> z x))
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpSub32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
                i := v_1.Args[0]
@@ -905,22 +1271,152 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                }
                v.reset(OpAdd32)
                v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpSub32, t)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
                v0.AddArg(z)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (Add32 (Sub32 z i:(Const32 <t>)) x)
+       // match: (Add32 x (Add32 z i:(Const32 <t>)))
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Sub32 (Add32 <t> x z) i)
+       // result: (Add32 i (Add32 <t> z x))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpSub32 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd32 {
                        break
                }
-               z := v_0.Args[0]
-               i := v_0.Args[1]
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 (Sub32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Add32 i (Sub32 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 x (Sub32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Add32 i (Sub32 <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 x (Sub32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Add32 i (Sub32 <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 (Sub32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Add32 i (Sub32 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 (Sub32 z i:(Const32 <t>)) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Sub32 (Add32 <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst32 {
                        break
                }
@@ -963,39 +1459,61 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x))
-       // cond:
-       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
+       // match: (Add32 x (Sub32 z i:(Const32 <t>)))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Sub32 (Add32 <t> x z) i)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAdd32 {
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
                }
-               if v_1_0.Type != t {
+               v.reset(OpSub32)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               v.AddArg(i)
+               return true
+       }
+       // match: (Add32 (Sub32 z i:(Const32 <t>)) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Sub32 (Add32 <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
-               v.reset(OpAdd32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = int64(int32(c + d))
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpSub32)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
                v.AddArg(v0)
-               v.AddArg(x)
+               v.AddArg(i)
                return true
        }
-       // match: (Add32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x))
+       // match: (Add32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x))
        // cond:
-       // result: (Sub32 (Const32 <t> [int64(int32(c+d))]) x)
+       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -1004,7 +1522,7 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -1016,16 +1534,16 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpSub32)
+               v.reset(OpAdd32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add32 (Const32 <t> [c]) (Sub32 x (Const32 <t> [d])))
+       // match: (Add32 (Const32 <t> [c]) (Add32 x (Const32 <t> [d])))
        // cond:
-       // result: (Add32 (Const32 <t> [int64(int32(c-d))]) x)
+       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -1034,7 +1552,7 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
                x := v_1.Args[0]
@@ -1048,72 +1566,270 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                d := v_1_1.AuxInt
                v.reset(OpAdd32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = int64(int32(c - d))
+               v0.AuxInt = int64(int32(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAdd32F(v *Value) bool {
-       // match: (Add32F (Const32F [c]) (Const32F [d]))
+       // match: (Add32 (Add32 (Const32 <t> [d]) x) (Const32 <t> [c]))
        // cond:
-       // result: (Const32F [f2i(float64(i2f32(c) + i2f32(d)))])
+       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32F {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst32F {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpConst32F)
-               v.AuxInt = f2i(float64(i2f32(c) + i2f32(d)))
-               return true
-       }
-       // match: (Add32F x (Const32F [0]))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32F {
+               if v_1.Op != OpConst32 {
                        break
                }
-               if v_1.AuxInt != 0 {
+               if v_1.Type != t {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c + d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add32F (Const32F [0]) x)
+       // match: (Add32 (Add32 x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
-       // result: x
+       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32F {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValuegeneric_OpAdd64(v *Value) bool {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x))
+       // cond:
+       // result: (Sub32 (Const32 <t> [int64(int32(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst32 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpSub32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32 (Sub32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Sub32 (Const32 <t> [int64(int32(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpSub32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32 (Const32 <t> [c]) (Sub32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Add32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32 (Sub32 x (Const32 <t> [d])) (Const32 <t> [c]))
+       // cond:
+       // result: (Add32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd32F(v *Value) bool {
+       // match: (Add32F (Const32F [c]) (Const32F [d]))
+       // cond:
+       // result: (Const32F [f2i(float64(i2f32(c) + i2f32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConst32F)
+               v.AuxInt = f2i(float64(i2f32(c) + i2f32(d)))
+               return true
+       }
+       // match: (Add32F (Const32F [d]) (Const32F [c]))
+       // cond:
+       // result: (Const32F [f2i(float64(i2f32(c) + i2f32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst32F)
+               v.AuxInt = f2i(float64(i2f32(c) + i2f32(d)))
+               return true
+       }
+       // match: (Add32F x (Const32F [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32F (Const32F [0]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Add64  (Const64 [c])  (Const64 [d]))
+       // match: (Add64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c+d])
        for {
@@ -1131,25 +1847,22 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AuxInt = c + d
                return true
        }
-       // match: (Add64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Add64 (Const64 <t> [c]) x)
+       // match: (Add64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c+d])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
-                       break
-               }
-               v.reset(OpAdd64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = c + d
                return true
        }
        // match: (Add64 (Const64 [0]) x)
@@ -1169,6 +1882,23 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Add64 x (Const64 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Add64 (Const64 [1]) (Com64 x))
        // cond:
        // result: (Neg64 x)
@@ -1189,20 +1919,23 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add64 x l:(Add64 _ _))
-       // cond: (x.Op != OpAdd64 && x.Op != OpConst64)
-       // result: (Add64 l x)
+       // match: (Add64 (Com64 x) (Const64 [1]))
+       // cond:
+       // result: (Neg64 x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAdd64 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpCom64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpAdd64 && x.Op != OpConst64) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAdd64)
-               v.AddArg(l)
+               v.reset(OpNeg64)
                v.AddArg(x)
                return true
        }
@@ -1232,12 +1965,90 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add64 (Sub64 i:(Const64 <t>) z) x)
+       // match: (Add64 (Add64 z i:(Const64 <t>)) x)
        // cond: (z.Op != OpConst64 && x.Op != OpConst64)
-       // result: (Add64 i (Sub64 <t> x z))
+       // result: (Add64 i (Add64 <t> z x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpSub64 {
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add64 x (Add64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Add64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add64 x (Add64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Add64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add64 (Sub64 i:(Const64 <t>) z) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Sub64 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
                        break
                }
                i := v_0.Args[0]
@@ -1284,6 +2095,58 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Add64 x (Sub64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Sub64 <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub64, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add64 (Sub64 i:(Const64 <t>) z) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Sub64 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub64, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Add64 (Sub64 z i:(Const64 <t>)) x)
        // cond: (z.Op != OpConst64 && x.Op != OpConst64)
        // result: (Sub64 (Add64 <t> x z) i)
@@ -1336,39 +2199,61 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x))
-       // cond:
-       // result: (Add64 (Const64 <t> [c+d]) x)
+       // match: (Add64 x (Sub64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Sub64 (Add64 <t> x z) i)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub64 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAdd64 {
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst64 {
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
                        break
                }
-               if v_1_0.Type != t {
+               v.reset(OpSub64)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               v.AddArg(i)
+               return true
+       }
+       // match: (Add64 (Sub64 z i:(Const64 <t>)) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Sub64 (Add64 <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
-               v.reset(OpAdd64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c + d
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpSub64)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
                v.AddArg(v0)
-               v.AddArg(x)
+               v.AddArg(i)
                return true
        }
-       // match: (Add64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x))
+       // match: (Add64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x))
        // cond:
-       // result: (Sub64 (Const64 <t> [c+d]) x)
+       // result: (Add64 (Const64 <t> [c+d]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst64 {
@@ -1377,7 +2262,7 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub64 {
+               if v_1.Op != OpAdd64 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -1389,16 +2274,16 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpSub64)
+               v.reset(OpAdd64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
                v0.AuxInt = c + d
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add64 (Const64 <t> [c]) (Sub64 x (Const64 <t> [d])))
+       // match: (Add64 (Const64 <t> [c]) (Add64 x (Const64 <t> [d])))
        // cond:
-       // result: (Add64 (Const64 <t> [c-d]) x)
+       // result: (Add64 (Const64 <t> [c+d]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst64 {
@@ -1407,7 +2292,7 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub64 {
+               if v_1.Op != OpAdd64 {
                        break
                }
                x := v_1.Args[0]
@@ -1421,72 +2306,270 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                d := v_1_1.AuxInt
                v.reset(OpAdd64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c - d
+               v0.AuxInt = c + d
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAdd64F(v *Value) bool {
-       // match: (Add64F (Const64F [c]) (Const64F [d]))
+       // match: (Add64 (Add64 (Const64 <t> [d]) x) (Const64 <t> [c]))
        // cond:
-       // result: (Const64F [f2i(i2f(c) + i2f(d))])
+       // result: (Add64 (Const64 <t> [c+d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64F {
+               if v_0.Op != OpAdd64 {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst64F {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpConst64F)
-               v.AuxInt = f2i(i2f(c) + i2f(d))
-               return true
-       }
-       // match: (Add64F x (Const64F [0]))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64F {
+               if v_1.Op != OpConst64 {
                        break
                }
-               if v_1.AuxInt != 0 {
+               if v_1.Type != t {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c + d
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add64F (Const64F [0]) x)
+       // match: (Add64 (Add64 x (Const64 <t> [d])) (Const64 <t> [c]))
        // cond:
-       // result: x
+       // result: (Add64 (Const64 <t> [c+d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64F {
+               if v_0.Op != OpAdd64 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValuegeneric_OpAdd8(v *Value) bool {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c + d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x))
+       // cond:
+       // result: (Sub64 (Const64 <t> [c+d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub64 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst64 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpSub64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c + d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64 (Sub64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Sub64 (Const64 <t> [c+d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpSub64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c + d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64 (Const64 <t> [c]) (Sub64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Add64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64 (Sub64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Add64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd64F(v *Value) bool {
+       // match: (Add64F (Const64F [c]) (Const64F [d]))
+       // cond:
+       // result: (Const64F [f2i(i2f(c) + i2f(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConst64F)
+               v.AuxInt = f2i(i2f(c) + i2f(d))
+               return true
+       }
+       // match: (Add64F (Const64F [d]) (Const64F [c]))
+       // cond:
+       // result: (Const64F [f2i(i2f(c) + i2f(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst64F)
+               v.AuxInt = f2i(i2f(c) + i2f(d))
+               return true
+       }
+       // match: (Add64F x (Const64F [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64F (Const64F [0]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Add8   (Const8 [c])   (Const8 [d]))
+       // match: (Add8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c+d))])
        for {
@@ -1504,28 +2587,25 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AuxInt = int64(int8(c + d))
                return true
        }
-       // match: (Add8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Add8  (Const8  <t> [c]) x)
+       // match: (Add8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c+d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
-                       break
-               }
-               v.reset(OpAdd8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c + d))
                return true
        }
-       // match: (Add8  (Const8  [0]) x)
+       // match: (Add8 (Const8 [0]) x)
        // cond:
        // result: x
        for {
@@ -1542,7 +2622,24 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add8  (Const8  [1]) (Com8  x))
+       // match: (Add8 x (Const8 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add8 (Const8 [1]) (Com8 x))
        // cond:
        // result: (Neg8  x)
        for {
@@ -1562,24 +2659,27 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add8  x l:(Add8  _ _))
-       // cond: (x.Op != OpAdd8  && x.Op != OpConst8)
-       // result: (Add8  l x)
+       // match: (Add8 (Com8 x) (Const8 [1]))
+       // cond:
+       // result: (Neg8  x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAdd8 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpCom8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
-               if !(x.Op != OpAdd8 && x.Op != OpConst8) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAdd8)
-               v.AddArg(l)
+               v.reset(OpNeg8)
                v.AddArg(x)
                return true
        }
-       // match: (Add8  (Add8  i:(Const8  <t>) z) x)
+       // match: (Add8 (Add8 i:(Const8 <t>) z) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Add8  i (Add8  <t> z x))
        for {
@@ -1605,12 +2705,90 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add8  (Sub8  i:(Const8  <t>) z) x)
+       // match: (Add8 (Add8 z i:(Const8 <t>)) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
-       // result: (Add8  i (Sub8  <t> x z))
+       // result: (Add8  i (Add8  <t> z x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpSub8 {
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 x (Add8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Add8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd8 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 x (Add8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Add8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd8 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 (Sub8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Sub8  <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
                        break
                }
                i := v_0.Args[0]
@@ -1631,7 +2809,33 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add8  x (Sub8  i:(Const8  <t>) z))
+       // match: (Add8 x (Sub8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Sub8  <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub8 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub8, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 x (Sub8 i:(Const8 <t>) z))
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Add8  i (Sub8  <t> x z))
        for {
@@ -1657,7 +2861,33 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add8  (Sub8  z i:(Const8  <t>)) x)
+       // match: (Add8 (Sub8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Sub8  <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub8, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 (Sub8 z i:(Const8 <t>)) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Sub8  (Add8  <t> x z) i)
        for {
@@ -1683,7 +2913,7 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add8  x (Sub8  z i:(Const8  <t>)))
+       // match: (Add8 x (Sub8 z i:(Const8 <t>)))
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Sub8  (Add8  <t> x z) i)
        for {
@@ -1709,39 +2939,61 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x))
-       // cond:
-       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
+       // match: (Add8 x (Sub8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Sub8  (Add8  <t> x z) i)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub8 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAdd8 {
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst8 {
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
                }
-               if v_1_0.Type != t {
+               v.reset(OpSub8)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               v.AddArg(i)
+               return true
+       }
+       // match: (Add8 (Sub8 z i:(Const8 <t>)) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Sub8  (Add8  <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
-               v.reset(OpAdd8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = int64(int8(c + d))
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpSub8)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
                v.AddArg(v0)
-               v.AddArg(x)
+               v.AddArg(i)
                return true
        }
-       // match: (Add8  (Const8  <t> [c]) (Sub8  (Const8  <t> [d]) x))
+       // match: (Add8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x))
        // cond:
-       // result: (Sub8  (Const8  <t> [int64(int8(c+d))]) x)
+       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst8 {
@@ -1750,7 +3002,7 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub8 {
+               if v_1.Op != OpAdd8 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -1762,16 +3014,16 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpSub8)
+               v.reset(OpAdd8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
                v0.AuxInt = int64(int8(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add8  (Const8  <t> [c]) (Sub8  x (Const8  <t> [d])))
+       // match: (Add8 (Const8 <t> [c]) (Add8 x (Const8 <t> [d])))
        // cond:
-       // result: (Add8  (Const8  <t> [int64(int8(c-d))]) x)
+       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst8 {
@@ -1780,7 +3032,7 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub8 {
+               if v_1.Op != OpAdd8 {
                        break
                }
                x := v_1.Args[0]
@@ -1794,59 +3046,239 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                d := v_1_1.AuxInt
                v.reset(OpAdd8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = int64(int8(c - d))
+               v0.AuxInt = int64(int8(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAddPtr(v *Value) bool {
-       // match: (AddPtr <t> x (Const64 [c]))
+       // match: (Add8 (Add8 (Const8 <t> [d]) x) (Const8 <t> [c]))
        // cond:
-       // result: (OffPtr <t> x [c])
+       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
-               t := v.Type
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpOffPtr)
-               v.Type = t
-               v.AuxInt = c
+               v.reset(OpAdd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c + d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (AddPtr <t> x (Const32 [c]))
+       // match: (Add8 (Add8 x (Const8 <t> [d])) (Const8 <t> [c]))
        // cond:
-       // result: (OffPtr <t> x [c])
+       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
-               t := v.Type
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpOffPtr)
-               v.Type = t
-               v.AuxInt = c
+               v.reset(OpAdd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c + d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAnd16(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (And16  (Const16 [c])  (Const16 [d]))
+       // match: (Add8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x))
        // cond:
-       // result: (Const16 [int64(int16(c&d))])
+       // result: (Sub8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst8 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpSub8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add8 (Sub8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Sub8  (Const8  <t> [int64(int8(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpSub8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add8 (Const8 <t> [c]) (Sub8 x (Const8 <t> [d])))
+       // cond:
+       // result: (Add8  (Const8  <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add8 (Sub8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Add8  (Const8  <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAddPtr(v *Value) bool {
+       // match: (AddPtr <t> x (Const64 [c]))
+       // cond:
+       // result: (OffPtr <t> x [c])
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpOffPtr)
+               v.Type = t
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (AddPtr <t> x (Const32 [c]))
+       // cond:
+       // result: (OffPtr <t> x [c])
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpOffPtr)
+               v.Type = t
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAnd16(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (And16 (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (Const16 [int64(int16(c&d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
                        break
                }
                c := v_0.AuxInt
@@ -1859,25 +3291,22 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AuxInt = int64(int16(c & d))
                return true
        }
-       // match: (And16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (And16 (Const16 <t> [c]) x)
+       // match: (And16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c&d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
-                       break
-               }
-               v.reset(OpAnd16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c & d))
                return true
        }
        // match: (And16 x x)
@@ -1910,6 +3339,23 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And16 x (Const16 [-1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (And16 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
@@ -1925,6 +3371,21 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (And16 _ (Const16 [0]))
+       // cond:
+       // result: (Const16 [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = 0
+               return true
+       }
        // match: (And16 x (And16 x y))
        // cond:
        // result: (And16 x y)
@@ -1979,7 +3440,7 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And16 (And16 x y) y)
+       // match: (And16 (And16 y x) x)
        // cond:
        // result: (And16 x y)
        for {
@@ -1987,9 +3448,9 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                if v_0.Op != OpAnd16 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpAnd16)
@@ -1997,24 +3458,33 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And16 x l:(And16 _ _))
-       // cond: (x.Op != OpAnd16 && x.Op != OpConst16)
-       // result: (And16 l x)
+       // match: (And16 (And16 i:(Const16 <t>) z) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (And16 i (And16 <t> z x))
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAnd16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd16 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst16 {
                        break
                }
-               if !(x.Op != OpAnd16 && x.Op != OpConst16) {
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
                        break
                }
                v.reset(OpAnd16)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (And16 (And16 i:(Const16 <t>) z) x)
+       // match: (And16 (And16 z i:(Const16 <t>)) x)
        // cond: (z.Op != OpConst16 && x.Op != OpConst16)
        // result: (And16 i (And16 <t> z x))
        for {
@@ -2022,12 +3492,12 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                if v_0.Op != OpAnd16 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst16 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst16 && x.Op != OpConst16) {
                        break
@@ -2040,31 +3510,173 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (And16 (Const16 <t> [c]) (And16 (Const16 <t> [d]) x))
-       // cond:
-       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       // match: (And16 x (And16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (And16 i (And16 <t> z x))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
-                       break
-               }
-               t := v_0.Type
-               c := v_0.AuxInt
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAnd16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
                        break
                }
-               if v_1_0.Type != t {
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
                v.reset(OpAnd16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And16 x (And16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (And16 i (And16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd16 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAnd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And16 (Const16 <t> [c]) (And16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpAnd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And16 (Const16 <t> [c]) (And16 x (Const16 <t> [d])))
+       // cond:
+       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAnd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And16 (And16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And16 (And16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c & d))
                v.AddArg(v0)
                v.AddArg(x)
@@ -2075,7 +3687,7 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
 func rewriteValuegeneric_OpAnd32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (And32  (Const32 [c])  (Const32 [d]))
+       // match: (And32 (Const32 [c]) (Const32 [d]))
        // cond:
        // result: (Const32 [int64(int32(c&d))])
        for {
@@ -2093,25 +3705,22 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AuxInt = int64(int32(c & d))
                return true
        }
-       // match: (And32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (And32 (Const32 <t> [c]) x)
+       // match: (And32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c&d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
-                       break
-               }
-               v.reset(OpAnd32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c & d))
                return true
        }
        // match: (And32 x x)
@@ -2144,6 +3753,23 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And32 x (Const32 [-1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (And32 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
@@ -2159,6 +3785,21 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (And32 _ (Const32 [0]))
+       // cond:
+       // result: (Const32 [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst32)
+               v.AuxInt = 0
+               return true
+       }
        // match: (And32 x (And32 x y))
        // cond:
        // result: (And32 x y)
@@ -2213,7 +3854,7 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And32 (And32 x y) y)
+       // match: (And32 (And32 y x) x)
        // cond:
        // result: (And32 x y)
        for {
@@ -2221,9 +3862,9 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                if v_0.Op != OpAnd32 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpAnd32)
@@ -2231,24 +3872,33 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And32 x l:(And32 _ _))
-       // cond: (x.Op != OpAnd32 && x.Op != OpConst32)
-       // result: (And32 l x)
+       // match: (And32 (And32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (And32 i (And32 <t> z x))
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAnd32 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpAnd32 && x.Op != OpConst32) {
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
                }
                v.reset(OpAnd32)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (And32 (And32 i:(Const32 <t>) z) x)
+       // match: (And32 (And32 z i:(Const32 <t>)) x)
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
        // result: (And32 i (And32 <t> z x))
        for {
@@ -2256,12 +3906,12 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                if v_0.Op != OpAnd32 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst32 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
@@ -2274,44 +3924,186 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (And32 (Const32 <t> [c]) (And32 (Const32 <t> [d]) x))
-       // cond:
-       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       // match: (And32 x (And32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (And32 i (And32 <t> z x))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
-                       break
-               }
-               t := v_0.Type
-               c := v_0.AuxInt
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAnd32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
                        break
                }
-               if v_1_0.Type != t {
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
                v.reset(OpAnd32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
                v.AddArg(v0)
-               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAnd64(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (And64  (Const64 [c])  (Const64 [d]))
-       // cond:
-       // result: (Const64 [c&d])
+       // match: (And32 x (And32 z i:(Const32 <t>)))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (And32 i (And32 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd32 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAnd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And32 (Const32 <t> [c]) (And32 (Const32 <t> [d]) x))
+       // cond:
+       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd32 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst32 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpAnd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And32 (Const32 <t> [c]) (And32 x (Const32 <t> [d])))
+       // cond:
+       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd32 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAnd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And32 (And32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And32 (And32 x (Const32 <t> [d])) (Const32 <t> [c]))
+       // cond:
+       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAnd64(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (And64 (Const64 [c]) (Const64 [d]))
+       // cond:
+       // result: (Const64 [c&d])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst64 {
@@ -2327,25 +4119,22 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AuxInt = c & d
                return true
        }
-       // match: (And64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (And64 (Const64 <t> [c]) x)
+       // match: (And64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c&d])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
-                       break
-               }
-               v.reset(OpAnd64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = c & d
                return true
        }
        // match: (And64 x x)
@@ -2378,6 +4167,23 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And64 x (Const64 [-1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (And64 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
@@ -2393,6 +4199,21 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (And64 _ (Const64 [0]))
+       // cond:
+       // result: (Const64 [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst64)
+               v.AuxInt = 0
+               return true
+       }
        // match: (And64 x (And64 x y))
        // cond:
        // result: (And64 x y)
@@ -2447,7 +4268,7 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And64 (And64 x y) y)
+       // match: (And64 (And64 y x) x)
        // cond:
        // result: (And64 x y)
        for {
@@ -2455,9 +4276,9 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                if v_0.Op != OpAnd64 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpAnd64)
@@ -2491,6 +4312,32 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(v2)
                return true
        }
+       // match: (And64 <t> x (Const64 [y]))
+       // cond: nlz(y) + nto(y) == 64 && nto(y) >= 32
+       // result: (Rsh64Ux64 (Lsh64x64 <t> x (Const64 <t> [nlz(y)])) (Const64 <t> [nlz(y)]))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               y := v_1.AuxInt
+               if !(nlz(y)+nto(y) == 64 && nto(y) >= 32) {
+                       break
+               }
+               v.reset(OpRsh64Ux64)
+               v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = nlz(y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = nlz(y)
+               v.AddArg(v2)
+               return true
+       }
        // match: (And64 <t> (Const64 [y]) x)
        // cond: nlo(y) + ntz(y) == 64 && ntz(y) >= 32
        // result: (Lsh64x64 (Rsh64Ux64 <t> x (Const64 <t> [ntz(y)])) (Const64 <t> [ntz(y)]))
@@ -2517,21 +4364,30 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(v2)
                return true
        }
-       // match: (And64 x l:(And64 _ _))
-       // cond: (x.Op != OpAnd64 && x.Op != OpConst64)
-       // result: (And64 l x)
+       // match: (And64 <t> x (Const64 [y]))
+       // cond: nlo(y) + ntz(y) == 64 && ntz(y) >= 32
+       // result: (Lsh64x64 (Rsh64Ux64 <t> x (Const64 <t> [ntz(y)])) (Const64 <t> [ntz(y)]))
        for {
+               t := v.Type
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAnd64 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpAnd64 && x.Op != OpConst64) {
+               y := v_1.AuxInt
+               if !(nlo(y)+ntz(y) == 64 && ntz(y) >= 32) {
                        break
                }
-               v.reset(OpAnd64)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.reset(OpLsh64x64)
+               v0 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = ntz(y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = ntz(y)
+               v.AddArg(v2)
                return true
        }
        // match: (And64 (And64 i:(Const64 <t>) z) x)
@@ -2560,23 +4416,101 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (And64 (Const64 <t> [c]) (And64 (Const64 <t> [d]) x))
-       // cond:
-       // result: (And64 (Const64 <t> [c&d]) x)
+       // match: (And64 (And64 z i:(Const64 <t>)) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (And64 i (And64 <t> z x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpAnd64 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAnd64 {
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst64 {
-                       break
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAnd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And64 x (And64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (And64 i (And64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAnd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And64 x (And64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (And64 i (And64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd64 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAnd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And64 (Const64 <t> [c]) (And64 (Const64 <t> [d]) x))
+       // cond:
+       // result: (And64 (Const64 <t> [c&d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd64 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst64 {
+                       break
                }
                if v_1_0.Type != t {
                        break
@@ -2590,12 +4524,102 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And64 (Const64 <t> [c]) (And64 x (Const64 <t> [d])))
+       // cond:
+       // result: (And64 (Const64 <t> [c&d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAnd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c & d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And64 (And64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (And64 (Const64 <t> [c&d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c & d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And64 (And64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (And64 (Const64 <t> [c&d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c & d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpAnd8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (And8   (Const8 [c])   (Const8 [d]))
+       // match: (And8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c&d))])
        for {
@@ -2613,28 +4637,25 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AuxInt = int64(int8(c & d))
                return true
        }
-       // match: (And8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (And8  (Const8  <t> [c]) x)
+       // match: (And8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c&d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
-                       break
-               }
-               v.reset(OpAnd8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c & d))
                return true
        }
-       // match: (And8  x x)
+       // match: (And8 x x)
        // cond:
        // result: x
        for {
@@ -2647,7 +4668,7 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (And8  (Const8  [-1]) x)
+       // match: (And8 (Const8 [-1]) x)
        // cond:
        // result: x
        for {
@@ -2664,7 +4685,24 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (And8  (Const8  [0]) _)
+       // match: (And8 x (Const8 [-1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (And8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -2679,7 +4717,22 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (And8  x (And8  x y))
+       // match: (And8 _ (Const8 [0]))
+       // cond:
+       // result: (Const8  [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst8)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (And8 x (And8 x y))
        // cond:
        // result: (And8  x y)
        for {
@@ -2697,7 +4750,7 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And8  x (And8  y x))
+       // match: (And8 x (And8 y x))
        // cond:
        // result: (And8  x y)
        for {
@@ -2715,7 +4768,7 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And8  (And8  x y) x)
+       // match: (And8 (And8 x y) x)
        // cond:
        // result: (And8  x y)
        for {
@@ -2733,7 +4786,7 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And8  (And8  x y) y)
+       // match: (And8 (And8 y x) x)
        // cond:
        // result: (And8  x y)
        for {
@@ -2741,9 +4794,9 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                if v_0.Op != OpAnd8 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpAnd8)
@@ -2751,24 +4804,33 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And8  x l:(And8  _ _))
-       // cond: (x.Op != OpAnd8  && x.Op != OpConst8)
-       // result: (And8  l x)
+       // match: (And8 (And8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (And8  i (And8  <t> z x))
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAnd8 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd8 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst8 {
                        break
                }
-               if !(x.Op != OpAnd8 && x.Op != OpConst8) {
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
                }
                v.reset(OpAnd8)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (And8  (And8  i:(Const8  <t>) z) x)
+       // match: (And8 (And8 z i:(Const8 <t>)) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (And8  i (And8  <t> z x))
        for {
@@ -2776,12 +4838,12 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                if v_0.Op != OpAnd8 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst8 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
@@ -2794,25 +4856,77 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (And8  (Const8  <t> [c]) (And8  (Const8  <t> [d]) x))
-       // cond:
-       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       // match: (And8 x (And8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (And8  i (And8  <t> z x))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
-                       break
-               }
-               t := v_0.Type
-               c := v_0.AuxInt
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAnd8 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst8 {
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
                        break
                }
-               if v_1_0.Type != t {
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAnd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And8 x (And8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (And8  i (And8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd8 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAnd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And8 (Const8 <t> [c]) (And8 (Const8 <t> [d]) x))
+       // cond:
+       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst8 {
+                       break
+               }
+               if v_1_0.Type != t {
                        break
                }
                d := v_1_0.AuxInt
@@ -2824,6 +4938,96 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And8 (Const8 <t> [c]) (And8 x (Const8 <t> [d])))
+       // cond:
+       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAnd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And8 (And8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And8 (And8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpArg(v *Value) bool {
@@ -3175,7 +5379,7 @@ func rewriteValuegeneric_OpCom64(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpCom8(v *Value) bool {
-       // match: (Com8  (Com8  x))
+       // match: (Com8 (Com8 x))
        // cond:
        // result: x
        for {
@@ -3438,7 +5642,7 @@ func rewriteValuegeneric_OpDiv16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16  (Const16 [c])  (Const16 [d]))
+       // match: (Div16 (Const16 [c]) (Const16 [d]))
        // cond: d != 0
        // result: (Const16 [int64(int16(c)/int16(d))])
        for {
@@ -3589,7 +5793,7 @@ func rewriteValuegeneric_OpDiv16u(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16u (Const16 [c])  (Const16 [d]))
+       // match: (Div16u (Const16 [c]) (Const16 [d]))
        // cond: d != 0
        // result: (Const16 [int64(int16(uint16(c)/uint16(d)))])
        for {
@@ -3770,7 +5974,7 @@ func rewriteValuegeneric_OpDiv32(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div32  (Const32 [c])  (Const32 [d]))
+       // match: (Div32 (Const32 [c]) (Const32 [d]))
        // cond: d != 0
        // result: (Const32 [int64(int32(c)/int32(d))])
        for {
@@ -4048,7 +6252,7 @@ func rewriteValuegeneric_OpDiv32u(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div32u (Const32 [c])  (Const32 [d]))
+       // match: (Div32u (Const32 [c]) (Const32 [d]))
        // cond: d != 0
        // result: (Const32 [int64(int32(uint32(c)/uint32(d)))])
        for {
@@ -4284,7 +6488,7 @@ func rewriteValuegeneric_OpDiv64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div64  (Const64 [c])  (Const64 [d]))
+       // match: (Div64 (Const64 [c]) (Const64 [d]))
        // cond: d != 0
        // result: (Const64 [c/d])
        for {
@@ -4523,7 +6727,7 @@ func rewriteValuegeneric_OpDiv64u(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div64u (Const64 [c])  (Const64 [d]))
+       // match: (Div64u (Const64 [c]) (Const64 [d]))
        // cond: d != 0
        // result: (Const64 [int64(uint64(c)/uint64(d))])
        for {
@@ -4657,7 +6861,7 @@ func rewriteValuegeneric_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   (Const8  [c])  (Const8  [d]))
+       // match: (Div8 (Const8 [c]) (Const8 [d]))
        // cond: d != 0
        // result: (Const8  [int64(int8(c)/int8(d))])
        for {
@@ -4678,7 +6882,7 @@ func rewriteValuegeneric_OpDiv8(v *Value) bool {
                v.AuxInt = int64(int8(c) / int8(d))
                return true
        }
-       // match: (Div8  <t> n (Const8  [c]))
+       // match: (Div8 <t> n (Const8 [c]))
        // cond: c < 0 && c != -1<<7
        // result: (Neg8  (Div8  <t> n (Const8  <t> [-c])))
        for {
@@ -4701,7 +6905,7 @@ func rewriteValuegeneric_OpDiv8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Div8  <t> x (Const8  [-1<<7 ]))
+       // match: (Div8 <t> x (Const8 [-1<<7 ]))
        // cond:
        // result: (Rsh8Ux64  (And8  <t> x (Neg8  <t> x)) (Const64 <types.UInt64> [7 ]))
        for {
@@ -4726,7 +6930,7 @@ func rewriteValuegeneric_OpDiv8(v *Value) bool {
                v.AddArg(v2)
                return true
        }
-       // match: (Div8  <t> n (Const8  [c]))
+       // match: (Div8 <t> n (Const8 [c]))
        // cond: isPowerOfTwo(c)
        // result: (Rsh8x64     (Add8  <t> n (Rsh8Ux64  <t> (Rsh8x64  <t> n (Const64 <types.UInt64> [ 7])) (Const64 <types.UInt64> [ 8-log2(c)])))     (Const64 <types.UInt64> [log2(c)]))
        for {
@@ -4806,7 +7010,7 @@ func rewriteValuegeneric_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  (Const8  [c])  (Const8  [d]))
+       // match: (Div8u (Const8 [c]) (Const8 [d]))
        // cond: d != 0
        // result: (Const8  [int64(int8(uint8(c)/uint8(d)))])
        for {
@@ -4827,7 +7031,7 @@ func rewriteValuegeneric_OpDiv8u(v *Value) bool {
                v.AuxInt = int64(int8(uint8(c) / uint8(d)))
                return true
        }
-       // match: (Div8u  n (Const8  [c]))
+       // match: (Div8u n (Const8 [c]))
        // cond: isPowerOfTwo(c&0xff)
        // result: (Rsh8Ux64 n  (Const64 <types.UInt64> [log2(c&0xff)]))
        for {
@@ -4923,23 +7127,92 @@ func rewriteValuegeneric_OpEq16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Eq16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Eq16 (Const16 <t> [c]) x)
+       // match: (Eq16 (Const16 <t> [c]) (Add16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Eq16 (Const16 <t> [int64(int16(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpEq16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq16 (Add16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Eq16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
+               v.reset(OpEq16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq16 (Add16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Eq16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpEq16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int16(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -4962,6 +7235,24 @@ func rewriteValuegeneric_OpEq16(v *Value) bool {
                v.AuxInt = b2i(c == d)
                return true
        }
+       // match: (Eq16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c == d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpEq32(v *Value) bool {
@@ -5009,30 +7300,99 @@ func rewriteValuegeneric_OpEq32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Eq32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Eq32 (Const32 <t> [c]) x)
+       // match: (Eq32 (Const32 <t> [c]) (Add32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Eq32 (Const32 <t> [int64(int32(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
                        break
                }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
                v.reset(OpEq32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int32(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Eq32 (Const32 [c]) (Const32 [d]))
+       // match: (Eq32 (Add32 (Const32 <t> [d]) x) (Const32 <t> [c]))
        // cond:
-       // result: (ConstBool [b2i(c == d)])
+       // result: (Eq32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpEq32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq32 (Add32 x (Const32 <t> [d])) (Const32 <t> [c]))
+       // cond:
+       // result: (Eq32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpEq32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq32 (Const32 [c]) (Const32 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -5048,6 +7408,24 @@ func rewriteValuegeneric_OpEq32(v *Value) bool {
                v.AuxInt = b2i(c == d)
                return true
        }
+       // match: (Eq32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c == d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpEq64(v *Value) bool {
@@ -5095,23 +7473,92 @@ func rewriteValuegeneric_OpEq64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Eq64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Eq64 (Const64 <t> [c]) x)
+       // match: (Eq64 (Const64 <t> [c]) (Add64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Eq64 (Const64 <t> [c-d]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpEq64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq64 (Add64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Eq64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
+               v.reset(OpEq64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq64 (Add64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Eq64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpEq64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
+               v0.AuxInt = c - d
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -5134,12 +7581,30 @@ func rewriteValuegeneric_OpEq64(v *Value) bool {
                v.AuxInt = b2i(c == d)
                return true
        }
+       // match: (Eq64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c == d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpEq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq8  x x)
+       // match: (Eq8 x x)
        // cond:
        // result: (ConstBool [1])
        for {
@@ -5151,7 +7616,7 @@ func rewriteValuegeneric_OpEq8(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (Eq8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x))
+       // match: (Eq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x))
        // cond:
        // result: (Eq8  (Const8 <t> [int64(int8(c-d))]) x)
        for {
@@ -5181,28 +7646,97 @@ func rewriteValuegeneric_OpEq8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Eq8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Eq8  (Const8  <t> [c]) x)
+       // match: (Eq8 (Const8 <t> [c]) (Add8 x (Const8 <t> [d])))
+       // cond:
+       // result: (Eq8  (Const8 <t> [int64(int8(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpEq8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq8 (Add8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Eq8  (Const8 <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
+               v.reset(OpEq8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq8 (Add8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Eq8  (Const8 <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
                        break
                }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpEq8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int8(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Eq8  (Const8  [c]) (Const8  [d]))
+       // match: (Eq8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c == d)])
        for {
@@ -5220,6 +7754,24 @@ func rewriteValuegeneric_OpEq8(v *Value) bool {
                v.AuxInt = b2i(c == d)
                return true
        }
+       // match: (Eq8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c == d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpEqB(v *Value) bool {
@@ -5366,8 +7918,30 @@ func rewriteValuegeneric_OpEqPtr(v *Value) bool {
                v.AuxInt = b2i(a == b)
                return true
        }
-       return false
-}
+       // match: (EqPtr (Addr {b} x) (Addr {a} x))
+       // cond:
+       // result: (ConstBool [b2i(a == b)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAddr {
+                       break
+               }
+               b := v_0.Aux
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAddr {
+                       break
+               }
+               a := v_1.Aux
+               if x != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(a == b)
+               return true
+       }
+       return false
+}
 func rewriteValuegeneric_OpEqSlice(v *Value) bool {
        b := v.Block
        _ = b
@@ -5516,7 +8090,7 @@ func rewriteValuegeneric_OpGeq64U(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpGeq8(v *Value) bool {
-       // match: (Geq8  (Const8  [c]) (Const8  [d]))
+       // match: (Geq8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c >= d)])
        for {
@@ -5537,7 +8111,7 @@ func rewriteValuegeneric_OpGeq8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpGeq8U(v *Value) bool {
-       // match: (Geq8U  (Const8  [c]) (Const8  [d]))
+       // match: (Geq8U (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(uint8(c)  >= uint8(d))])
        for {
@@ -5684,7 +8258,7 @@ func rewriteValuegeneric_OpGreater64U(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpGreater8(v *Value) bool {
-       // match: (Greater8  (Const8  [c]) (Const8  [d]))
+       // match: (Greater8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c > d)])
        for {
@@ -5705,7 +8279,7 @@ func rewriteValuegeneric_OpGreater8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpGreater8U(v *Value) bool {
-       // match: (Greater8U  (Const8  [c]) (Const8  [d]))
+       // match: (Greater8U (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(uint8(c)  > uint8(d))])
        for {
@@ -5803,7 +8377,7 @@ func rewriteValuegeneric_OpInterCall(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
-       // match: (IsInBounds (ZeroExt8to32  _) (Const32 [c]))
+       // match: (IsInBounds (ZeroExt8to32 _) (Const32 [c]))
        // cond: (1 << 8)  <= c
        // result: (ConstBool [1])
        for {
@@ -5823,7 +8397,7 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt8to64  _) (Const64 [c]))
+       // match: (IsInBounds (ZeroExt8to64 _) (Const64 [c]))
        // cond: (1 << 8)  <= c
        // result: (ConstBool [1])
        for {
@@ -5895,7 +8469,7 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (IsInBounds                (And8  (Const8  [c]) _)  (Const8  [d]))
+       // match: (IsInBounds (And8 (Const8 [c]) _) (Const8 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
@@ -5920,7 +8494,32 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt8to16  (And8  (Const8  [c]) _)) (Const16 [d]))
+       // match: (IsInBounds (And8 _ (Const8 [c])) (Const8 [d]))
+       // cond: 0 <= c && c < d
+       // result: (ConstBool [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd8 {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (IsInBounds (ZeroExt8to16 (And8 (Const8 [c]) _)) (Const16 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
@@ -5949,25 +8548,25 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt8to32  (And8  (Const8  [c]) _)) (Const32 [d]))
+       // match: (IsInBounds (ZeroExt8to16 (And8 _ (Const8 [c]))) (Const16 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt8to32 {
+               if v_0.Op != OpZeroExt8to16 {
                        break
                }
                v_0_0 := v_0.Args[0]
                if v_0_0.Op != OpAnd8 {
                        break
                }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpConst8 {
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst8 {
                        break
                }
-               c := v_0_0_0.AuxInt
+               c := v_0_0_1.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpConst16 {
                        break
                }
                d := v_1.AuxInt
@@ -5978,12 +8577,12 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt8to64  (And8  (Const8  [c]) _)) (Const64 [d]))
+       // match: (IsInBounds (ZeroExt8to32 (And8 (Const8 [c]) _)) (Const32 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt8to64 {
+               if v_0.Op != OpZeroExt8to32 {
                        break
                }
                v_0_0 := v_0.Args[0]
@@ -5996,7 +8595,7 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                }
                c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
@@ -6007,21 +8606,25 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds                (And16 (Const16 [c]) _)  (Const16 [d]))
+       // match: (IsInBounds (ZeroExt8to32 (And8 _ (Const8 [c]))) (Const32 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAnd16 {
+               if v_0.Op != OpZeroExt8to32 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst16 {
+               if v_0_0.Op != OpAnd8 {
                        break
                }
-               c := v_0_0.AuxInt
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst8 {
+                       break
+               }
+               c := v_0_0_1.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
@@ -6032,25 +8635,25 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt16to32 (And16 (Const16 [c]) _)) (Const32 [d]))
+       // match: (IsInBounds (ZeroExt8to64 (And8 (Const8 [c]) _)) (Const64 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt16to32 {
+               if v_0.Op != OpZeroExt8to64 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAnd16 {
+               if v_0_0.Op != OpAnd8 {
                        break
                }
                v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpConst16 {
+               if v_0_0_0.Op != OpConst8 {
                        break
                }
                c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
@@ -6061,23 +8664,23 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt16to64 (And16 (Const16 [c]) _)) (Const64 [d]))
+       // match: (IsInBounds (ZeroExt8to64 (And8 _ (Const8 [c]))) (Const64 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt16to64 {
+               if v_0.Op != OpZeroExt8to64 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAnd16 {
+               if v_0_0.Op != OpAnd8 {
                        break
                }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpConst16 {
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst8 {
                        break
                }
-               c := v_0_0_0.AuxInt
+               c := v_0_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
@@ -6090,21 +8693,21 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds                (And32 (Const32 [c]) _)  (Const32 [d]))
+       // match: (IsInBounds (And16 (Const16 [c]) _) (Const16 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAnd32 {
+               if v_0.Op != OpAnd16 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst32 {
+               if v_0_0.Op != OpConst16 {
                        break
                }
                c := v_0_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpConst16 {
                        break
                }
                d := v_1.AuxInt
@@ -6115,25 +8718,21 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt32to64 (And32 (Const32 [c]) _)) (Const64 [d]))
+       // match: (IsInBounds (And16 _ (Const16 [c])) (Const16 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt32to64 {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAnd32 {
+               if v_0.Op != OpAnd16 {
                        break
                }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpConst32 {
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
                        break
                }
-               c := v_0_0_0.AuxInt
+               c := v_0_1.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst16 {
                        break
                }
                d := v_1.AuxInt
@@ -6144,21 +8743,25 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds                (And64 (Const64 [c]) _)  (Const64 [d]))
+       // match: (IsInBounds (ZeroExt16to32 (And16 (Const16 [c]) _)) (Const32 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAnd64 {
+               if v_0.Op != OpZeroExt16to32 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst64 {
+               if v_0_0.Op != OpAnd16 {
                        break
                }
-               c := v_0_0.AuxInt
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
@@ -6169,187 +8772,254 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (Const32 [c]) (Const32 [d]))
-       // cond:
-       // result: (ConstBool [b2i(0 <= c && c < d)])
+       // match: (IsInBounds (ZeroExt16to32 (And16 _ (Const16 [c]))) (Const32 [d]))
+       // cond: 0 <= c && c < d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpZeroExt16to32 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAnd16 {
+                       break
+               }
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst16 {
+                       break
+               }
+               c := v_0_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(0 <= c && c < d)
+               v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (Const64 [c]) (Const64 [d]))
-       // cond:
-       // result: (ConstBool [b2i(0 <= c && c < d)])
+       // match: (IsInBounds (ZeroExt16to64 (And16 (Const16 [c]) _)) (Const64 [d]))
+       // cond: 0 <= c && c < d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpZeroExt16to64 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAnd16 {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(0 <= c && c < d)
+               v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (Mod32u _ y) y)
-       // cond:
+       // match: (IsInBounds (ZeroExt16to64 (And16 _ (Const16 [c]))) (Const64 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpMod32u {
+               if v_0.Op != OpZeroExt16to64 {
                        break
                }
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAnd16 {
+                       break
+               }
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst16 {
+                       break
+               }
+               c := v_0_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (Mod64u _ y) y)
-       // cond:
+       // match: (IsInBounds (And32 (Const32 [c]) _) (Const32 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpMod64u {
+               if v_0.Op != OpAnd32 {
                        break
                }
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               v.reset(OpConstBool)
-               v.AuxInt = 1
-               return true
-       }
-       return false
-}
-func rewriteValuegeneric_OpIsNonNil(v *Value) bool {
-       // match: (IsNonNil (ConstNil))
-       // cond:
-       // result: (ConstBool [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConstNil {
+               c := v_0_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               v.reset(OpConstBool)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValuegeneric_OpIsSliceInBounds(v *Value) bool {
-       // match: (IsSliceInBounds x x)
-       // cond:
-       // result: (ConstBool [1])
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (And32 (Const32 [c]) _) (Const32 [d]))
-       // cond: 0 <= c && c <= d
+       // match: (IsInBounds (And32 _ (Const32 [c])) (Const32 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAnd32 {
                        break
                }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst32 {
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               c := v_0_0.AuxInt
+               c := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
-               if !(0 <= c && c <= d) {
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (And64 (Const64 [c]) _) (Const64 [d]))
-       // cond: 0 <= c && c <= d
+       // match: (IsInBounds (ZeroExt32to64 (And32 (Const32 [c]) _)) (Const64 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAnd64 {
+               if v_0.Op != OpZeroExt32to64 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst64 {
+               if v_0_0.Op != OpAnd32 {
                        break
                }
-               c := v_0_0.AuxInt
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
-               if !(0 <= c && c <= d) {
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (Const32 [0]) _)
-       // cond:
+       // match: (IsInBounds (ZeroExt32to64 (And32 _ (Const32 [c]))) (Const64 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpZeroExt32to64 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAnd32 {
                        break
                }
-               v.reset(OpConstBool)
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst32 {
+                       break
+               }
+               c := v_0_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
+               v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (Const64 [0]) _)
-       // cond:
+       // match: (IsInBounds (And64 (Const64 [c]) _) (Const64 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpAnd64 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (Const32 [c]) (Const32 [d]))
+       // match: (IsInBounds (And64 _ (Const64 [c])) (Const64 [d]))
+       // cond: 0 <= c && c < d
+       // result: (ConstBool [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (IsInBounds (Const32 [c]) (Const32 [d]))
        // cond:
-       // result: (ConstBool [b2i(0 <= c && c <= d)])
+       // result: (ConstBool [b2i(0 <= c && c < d)])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -6362,12 +9032,12 @@ func rewriteValuegeneric_OpIsSliceInBounds(v *Value) bool {
                }
                d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(0 <= c && c <= d)
+               v.AuxInt = b2i(0 <= c && c < d)
                return true
        }
-       // match: (IsSliceInBounds (Const64 [c]) (Const64 [d]))
+       // match: (IsInBounds (Const64 [c]) (Const64 [d]))
        // cond:
-       // result: (ConstBool [b2i(0 <= c && c <= d)])
+       // result: (ConstBool [b2i(0 <= c && c < d)])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst64 {
@@ -6380,227 +9050,455 @@ func rewriteValuegeneric_OpIsSliceInBounds(v *Value) bool {
                }
                d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(0 <= c && c <= d)
+               v.AuxInt = b2i(0 <= c && c < d)
                return true
        }
-       // match: (IsSliceInBounds (SliceLen x) (SliceCap x))
+       // match: (IsInBounds (Mod32u _ y) y)
        // cond:
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpSliceLen {
-                       break
-               }
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpSliceCap {
+               if v_0.Op != OpMod32u {
                        break
                }
-               if x != v_1.Args[0] {
+               y := v_0.Args[1]
+               if y != v.Args[1] {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq16(v *Value) bool {
-       // match: (Leq16 (Const16 [c]) (Const16 [d]))
+       // match: (IsInBounds (Mod64u _ y) y)
        // cond:
-       // result: (ConstBool [b2i(c <= d)])
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpMod64u {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               y := v_0.Args[1]
+               if y != v.Args[1] {
                        break
                }
-               d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c <= d)
+               v.AuxInt = 1
                return true
        }
        return false
 }
-func rewriteValuegeneric_OpLeq16U(v *Value) bool {
-       // match: (Leq16U (Const16 [c]) (Const16 [d]))
+func rewriteValuegeneric_OpIsNonNil(v *Value) bool {
+       // match: (IsNonNil (ConstNil))
        // cond:
-       // result: (ConstBool [b2i(uint16(c) <= uint16(d))])
+       // result: (ConstBool [0])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
-                       break
-               }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_0.Op != OpConstNil {
                        break
                }
-               d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(uint16(c) <= uint16(d))
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValuegeneric_OpLeq32(v *Value) bool {
-       // match: (Leq32 (Const32 [c]) (Const32 [d]))
+func rewriteValuegeneric_OpIsSliceInBounds(v *Value) bool {
+       // match: (IsSliceInBounds x x)
        // cond:
-       // result: (ConstBool [b2i(c <= d)])
+       // result: (ConstBool [1])
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (IsSliceInBounds (And32 (Const32 [c]) _) (Const32 [d]))
+       // cond: 0 <= c && c <= d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAnd32 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c <= d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c <= d)
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq32U(v *Value) bool {
-       // match: (Leq32U (Const32 [c]) (Const32 [d]))
-       // cond:
-       // result: (ConstBool [b2i(uint32(c) <= uint32(d))])
+       // match: (IsSliceInBounds (And32 _ (Const32 [c])) (Const32 [d]))
+       // cond: 0 <= c && c <= d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAnd32 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               c := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c <= d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(uint32(c) <= uint32(d))
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq64(v *Value) bool {
-       // match: (Leq64 (Const64 [c]) (Const64 [d]))
-       // cond:
-       // result: (ConstBool [b2i(c <= d)])
+       // match: (IsSliceInBounds (And64 (Const64 [c]) _) (Const64 [d]))
+       // cond: 0 <= c && c <= d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpAnd64 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c <= d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c <= d)
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq64U(v *Value) bool {
-       // match: (Leq64U (Const64 [c]) (Const64 [d]))
-       // cond:
-       // result: (ConstBool [b2i(uint64(c) <= uint64(d))])
+       // match: (IsSliceInBounds (And64 _ (Const64 [c])) (Const64 [d]))
+       // cond: 0 <= c && c <= d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpAnd64 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               c := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c <= d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(uint64(c) <= uint64(d))
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq8(v *Value) bool {
-       // match: (Leq8  (Const8  [c]) (Const8  [d]))
+       // match: (IsSliceInBounds (Const32 [0]) _)
        // cond:
-       // result: (ConstBool [b2i(c <= d)])
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
+               if v_0.Op != OpConst32 {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst8 {
+               if v_0.AuxInt != 0 {
                        break
                }
-               d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c <= d)
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq8U(v *Value) bool {
-       // match: (Leq8U  (Const8  [c]) (Const8  [d]))
+       // match: (IsSliceInBounds (Const64 [0]) _)
        // cond:
-       // result: (ConstBool [b2i(uint8(c)  <= uint8(d))])
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
+               if v_0.Op != OpConst64 {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst8 {
+               if v_0.AuxInt != 0 {
                        break
                }
-               d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(uint8(c) <= uint8(d))
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLess16(v *Value) bool {
-       // match: (Less16 (Const16 [c]) (Const16 [d]))
+       // match: (IsSliceInBounds (Const32 [c]) (Const32 [d]))
        // cond:
-       // result: (ConstBool [b2i(c < d)])
+       // result: (ConstBool [b2i(0 <= c && c <= d)])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c < d)
+               v.AuxInt = b2i(0 <= c && c <= d)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLess16U(v *Value) bool {
-       // match: (Less16U (Const16 [c]) (Const16 [d]))
+       // match: (IsSliceInBounds (Const64 [c]) (Const64 [d]))
        // cond:
-       // result: (ConstBool [b2i(uint16(c) < uint16(d))])
+       // result: (ConstBool [b2i(0 <= c && c <= d)])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(0 <= c && c <= d)
+               return true
+       }
+       // match: (IsSliceInBounds (SliceLen x) (SliceCap x))
+       // cond:
+       // result: (ConstBool [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSliceLen {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSliceCap {
+                       break
+               }
+               if x != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = 1
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq16(v *Value) bool {
+       // match: (Leq16 (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c <= d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c <= d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq16U(v *Value) bool {
+       // match: (Leq16U (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint16(c) <= uint16(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(uint16(c) <= uint16(d))
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq32(v *Value) bool {
+       // match: (Leq32 (Const32 [c]) (Const32 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c <= d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c <= d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq32U(v *Value) bool {
+       // match: (Leq32U (Const32 [c]) (Const32 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint32(c) <= uint32(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(uint32(c) <= uint32(d))
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq64(v *Value) bool {
+       // match: (Leq64 (Const64 [c]) (Const64 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c <= d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c <= d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq64U(v *Value) bool {
+       // match: (Leq64U (Const64 [c]) (Const64 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint64(c) <= uint64(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(uint64(c) <= uint64(d))
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq8(v *Value) bool {
+       // match: (Leq8 (Const8 [c]) (Const8 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c <= d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c <= d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq8U(v *Value) bool {
+       // match: (Leq8U (Const8 [c]) (Const8 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint8(c)  <= uint8(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(uint8(c) <= uint8(d))
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLess16(v *Value) bool {
+       // match: (Less16 (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c < d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c < d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLess16U(v *Value) bool {
+       // match: (Less16U (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint16(c) < uint16(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
                        break
                }
                c := v_0.AuxInt
@@ -6700,7 +9598,7 @@ func rewriteValuegeneric_OpLess64U(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpLess8(v *Value) bool {
-       // match: (Less8  (Const8  [c]) (Const8  [d]))
+       // match: (Less8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c < d)])
        for {
@@ -6721,7 +9619,7 @@ func rewriteValuegeneric_OpLess8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpLess8U(v *Value) bool {
-       // match: (Less8U  (Const8  [c]) (Const8  [d]))
+       // match: (Less8U (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(uint8(c)  < uint8(d))])
        for {
@@ -6933,7 +9831,7 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
 func rewriteValuegeneric_OpLsh16x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x16  <t> x (Const16 [c]))
+       // match: (Lsh16x16 <t> x (Const16 [c]))
        // cond:
        // result: (Lsh16x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -6951,7 +9849,7 @@ func rewriteValuegeneric_OpLsh16x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh16x16  (Const16 [0]) _)
+       // match: (Lsh16x16 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -6971,7 +9869,7 @@ func rewriteValuegeneric_OpLsh16x16(v *Value) bool {
 func rewriteValuegeneric_OpLsh16x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x32  <t> x (Const32 [c]))
+       // match: (Lsh16x32 <t> x (Const32 [c]))
        // cond:
        // result: (Lsh16x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -6989,7 +9887,7 @@ func rewriteValuegeneric_OpLsh16x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh16x32  (Const16 [0]) _)
+       // match: (Lsh16x32 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -7011,7 +9909,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x64  (Const16 [c]) (Const64 [d]))
+       // match: (Lsh16x64 (Const16 [c]) (Const64 [d]))
        // cond:
        // result: (Const16 [int64(int16(c) << uint64(d))])
        for {
@@ -7029,7 +9927,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
                v.AuxInt = int64(int16(c) << uint64(d))
                return true
        }
-       // match: (Lsh16x64  x (Const64 [0]))
+       // match: (Lsh16x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -7046,7 +9944,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh16x64  (Const16 [0]) _)
+       // match: (Lsh16x64 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -7061,7 +9959,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh16x64  _ (Const64 [c]))
+       // match: (Lsh16x64 _ (Const64 [c]))
        // cond: uint64(c) >= 16
        // result: (Const16 [0])
        for {
@@ -7150,7 +10048,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
 func rewriteValuegeneric_OpLsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x8   <t> x (Const8  [c]))
+       // match: (Lsh16x8 <t> x (Const8 [c]))
        // cond:
        // result: (Lsh16x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -7168,7 +10066,7 @@ func rewriteValuegeneric_OpLsh16x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh16x8  (Const16 [0]) _)
+       // match: (Lsh16x8 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -7188,7 +10086,7 @@ func rewriteValuegeneric_OpLsh16x8(v *Value) bool {
 func rewriteValuegeneric_OpLsh32x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x16  <t> x (Const16 [c]))
+       // match: (Lsh32x16 <t> x (Const16 [c]))
        // cond:
        // result: (Lsh32x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -7206,7 +10104,7 @@ func rewriteValuegeneric_OpLsh32x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh32x16  (Const32 [0]) _)
+       // match: (Lsh32x16 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -7226,7 +10124,7 @@ func rewriteValuegeneric_OpLsh32x16(v *Value) bool {
 func rewriteValuegeneric_OpLsh32x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x32  <t> x (Const32 [c]))
+       // match: (Lsh32x32 <t> x (Const32 [c]))
        // cond:
        // result: (Lsh32x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -7244,7 +10142,7 @@ func rewriteValuegeneric_OpLsh32x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh32x32  (Const32 [0]) _)
+       // match: (Lsh32x32 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -7266,7 +10164,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x64  (Const32 [c]) (Const64 [d]))
+       // match: (Lsh32x64 (Const32 [c]) (Const64 [d]))
        // cond:
        // result: (Const32 [int64(int32(c) << uint64(d))])
        for {
@@ -7284,7 +10182,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
                v.AuxInt = int64(int32(c) << uint64(d))
                return true
        }
-       // match: (Lsh32x64  x (Const64 [0]))
+       // match: (Lsh32x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -7301,7 +10199,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh32x64  (Const32 [0]) _)
+       // match: (Lsh32x64 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -7316,7 +10214,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh32x64  _ (Const64 [c]))
+       // match: (Lsh32x64 _ (Const64 [c]))
        // cond: uint64(c) >= 32
        // result: (Const32 [0])
        for {
@@ -7405,7 +10303,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
 func rewriteValuegeneric_OpLsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x8   <t> x (Const8  [c]))
+       // match: (Lsh32x8 <t> x (Const8 [c]))
        // cond:
        // result: (Lsh32x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -7423,7 +10321,7 @@ func rewriteValuegeneric_OpLsh32x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh32x8  (Const32 [0]) _)
+       // match: (Lsh32x8 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -7443,7 +10341,7 @@ func rewriteValuegeneric_OpLsh32x8(v *Value) bool {
 func rewriteValuegeneric_OpLsh64x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x16  <t> x (Const16 [c]))
+       // match: (Lsh64x16 <t> x (Const16 [c]))
        // cond:
        // result: (Lsh64x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -7461,7 +10359,7 @@ func rewriteValuegeneric_OpLsh64x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh64x16  (Const64 [0]) _)
+       // match: (Lsh64x16 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -7481,7 +10379,7 @@ func rewriteValuegeneric_OpLsh64x16(v *Value) bool {
 func rewriteValuegeneric_OpLsh64x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x32  <t> x (Const32 [c]))
+       // match: (Lsh64x32 <t> x (Const32 [c]))
        // cond:
        // result: (Lsh64x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -7499,7 +10397,7 @@ func rewriteValuegeneric_OpLsh64x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh64x32  (Const64 [0]) _)
+       // match: (Lsh64x32 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -7521,7 +10419,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x64  (Const64 [c]) (Const64 [d]))
+       // match: (Lsh64x64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c << uint64(d)])
        for {
@@ -7539,7 +10437,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
                v.AuxInt = c << uint64(d)
                return true
        }
-       // match: (Lsh64x64  x (Const64 [0]))
+       // match: (Lsh64x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -7556,7 +10454,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh64x64  (Const64 [0]) _)
+       // match: (Lsh64x64 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -7571,7 +10469,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh64x64  _ (Const64 [c]))
+       // match: (Lsh64x64 _ (Const64 [c]))
        // cond: uint64(c) >= 64
        // result: (Const64 [0])
        for {
@@ -7660,7 +10558,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
 func rewriteValuegeneric_OpLsh64x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x8   <t> x (Const8  [c]))
+       // match: (Lsh64x8 <t> x (Const8 [c]))
        // cond:
        // result: (Lsh64x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -7678,7 +10576,7 @@ func rewriteValuegeneric_OpLsh64x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh64x8  (Const64 [0]) _)
+       // match: (Lsh64x8 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -7698,7 +10596,7 @@ func rewriteValuegeneric_OpLsh64x8(v *Value) bool {
 func rewriteValuegeneric_OpLsh8x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x16  <t> x (Const16 [c]))
+       // match: (Lsh8x16 <t> x (Const16 [c]))
        // cond:
        // result: (Lsh8x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -7716,7 +10614,7 @@ func rewriteValuegeneric_OpLsh8x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh8x16   (Const8 [0]) _)
+       // match: (Lsh8x16 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -7736,7 +10634,7 @@ func rewriteValuegeneric_OpLsh8x16(v *Value) bool {
 func rewriteValuegeneric_OpLsh8x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x32  <t> x (Const32 [c]))
+       // match: (Lsh8x32 <t> x (Const32 [c]))
        // cond:
        // result: (Lsh8x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -7754,7 +10652,7 @@ func rewriteValuegeneric_OpLsh8x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh8x32   (Const8 [0]) _)
+       // match: (Lsh8x32 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -7776,7 +10674,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x64   (Const8  [c]) (Const64 [d]))
+       // match: (Lsh8x64 (Const8 [c]) (Const64 [d]))
        // cond:
        // result: (Const8  [int64(int8(c) << uint64(d))])
        for {
@@ -7794,7 +10692,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
                v.AuxInt = int64(int8(c) << uint64(d))
                return true
        }
-       // match: (Lsh8x64   x (Const64 [0]))
+       // match: (Lsh8x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -7811,7 +10709,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh8x64   (Const8 [0]) _)
+       // match: (Lsh8x64 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -7826,7 +10724,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh8x64   _ (Const64 [c]))
+       // match: (Lsh8x64 _ (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (Const8  [0])
        for {
@@ -7842,7 +10740,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh8x64  <t> (Lsh8x64  x (Const64 [c])) (Const64 [d]))
+       // match: (Lsh8x64 <t> (Lsh8x64 x (Const64 [c])) (Const64 [d]))
        // cond: !uaddOvf(c,d)
        // result: (Lsh8x64  x (Const64 <t> [c+d]))
        for {
@@ -7915,7 +10813,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
 func rewriteValuegeneric_OpLsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x8   <t> x (Const8  [c]))
+       // match: (Lsh8x8 <t> x (Const8 [c]))
        // cond:
        // result: (Lsh8x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -7933,7 +10831,7 @@ func rewriteValuegeneric_OpLsh8x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh8x8   (Const8 [0]) _)
+       // match: (Lsh8x8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -7996,7 +10894,7 @@ func rewriteValuegeneric_OpMod16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod16  <t> x (Const16 [c]))
+       // match: (Mod16 <t> x (Const16 [c]))
        // cond: x.Op != OpConst16 && (c > 0 || c == -1<<15)
        // result: (Sub16 x (Mul16 <t> (Div16  <t> x (Const16 <t> [c])) (Const16 <t> [c])))
        for {
@@ -8149,7 +11047,7 @@ func rewriteValuegeneric_OpMod32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod32  <t> x (Const32 [c]))
+       // match: (Mod32 <t> x (Const32 [c]))
        // cond: x.Op != OpConst32 && (c > 0 || c == -1<<31)
        // result: (Sub32 x (Mul32 <t> (Div32  <t> x (Const32 <t> [c])) (Const32 <t> [c])))
        for {
@@ -8302,7 +11200,7 @@ func rewriteValuegeneric_OpMod64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod64  <t> x (Const64 [c]))
+       // match: (Mod64 <t> x (Const64 [c]))
        // cond: x.Op != OpConst64 && (c > 0 || c == -1<<63)
        // result: (Sub64 x (Mul64 <t> (Div64  <t> x (Const64 <t> [c])) (Const64 <t> [c])))
        for {
@@ -8412,7 +11310,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool {
 func rewriteValuegeneric_OpMod8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Mod8  (Const8  [c]) (Const8  [d]))
+       // match: (Mod8 (Const8 [c]) (Const8 [d]))
        // cond: d != 0
        // result: (Const8  [int64(int8(c % d))])
        for {
@@ -8433,7 +11331,7 @@ func rewriteValuegeneric_OpMod8(v *Value) bool {
                v.AuxInt = int64(int8(c % d))
                return true
        }
-       // match: (Mod8  <t> n (Const8  [c]))
+       // match: (Mod8 <t> n (Const8 [c]))
        // cond: c < 0 && c != -1<<7
        // result: (Mod8  <t> n (Const8  <t> [-c]))
        for {
@@ -8455,7 +11353,7 @@ func rewriteValuegeneric_OpMod8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod8   <t> x (Const8  [c]))
+       // match: (Mod8 <t> x (Const8 [c]))
        // cond: x.Op != OpConst8  && (c > 0 || c == -1<<7)
        // result: (Sub8  x (Mul8  <t> (Div8   <t> x (Const8  <t> [c])) (Const8  <t> [c])))
        for {
@@ -8489,7 +11387,7 @@ func rewriteValuegeneric_OpMod8(v *Value) bool {
 func rewriteValuegeneric_OpMod8u(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Mod8u  (Const8 [c])  (Const8  [d]))
+       // match: (Mod8u (Const8 [c]) (Const8 [d]))
        // cond: d != 0
        // result: (Const8  [int64(uint8(c) % uint8(d))])
        for {
@@ -8510,7 +11408,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
                v.AuxInt = int64(uint8(c) % uint8(d))
                return true
        }
-       // match: (Mod8u  <t> n (Const8  [c]))
+       // match: (Mod8u <t> n (Const8 [c]))
        // cond: isPowerOfTwo(c&0xff)
        // result: (And8 n (Const8 <t> [(c&0xff)-1]))
        for {
@@ -8531,66 +11429,433 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod8u  <t> x (Const8  [c]))
-       // cond: x.Op != OpConst8  && c > 0 && umagicOK(8 ,c)
-       // result: (Sub8  x (Mul8  <t> (Div8u  <t> x (Const8  <t> [c])) (Const8  <t> [c])))
+       // match: (Mod8u <t> x (Const8 [c]))
+       // cond: x.Op != OpConst8  && c > 0 && umagicOK(8 ,c)
+       // result: (Sub8  x (Mul8  <t> (Div8u  <t> x (Const8  <t> [c])) (Const8  <t> [c])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(x.Op != OpConst8 && c > 0 && umagicOK(8, c)) {
+                       break
+               }
+               v.reset(OpSub8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpMul8, t)
+               v1 := b.NewValue0(v.Pos, OpDiv8u, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpConst8, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpConst8, t)
+               v3.AuxInt = c
+               v0.AddArg(v3)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpMul16(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (Mul16 (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (Const16 [int64(int16(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c * d))
+               return true
+       }
+       // match: (Mul16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c * d))
+               return true
+       }
+       // match: (Mul16 (Const16 [1]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 x (Const16 [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 (Const16 [-1]) x)
+       // cond:
+       // result: (Neg16 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpNeg16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 x (Const16 [-1]))
+       // cond:
+       // result: (Neg16 x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpNeg16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 <t> n (Const16 [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(c)]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpLsh16x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul16 <t> (Const16 [c]) n)
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(c)]))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpLsh16x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul16 <t> n (Const16 [c]))
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg16 (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
+                       break
+               }
+               v.reset(OpNeg16)
+               v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul16 <t> (Const16 [c]) n)
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg16 (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
+                       break
+               }
+               v.reset(OpNeg16)
+               v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul16 (Const16 [0]) _)
+       // cond:
+       // result: (Const16 [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Mul16 _ (Const16 [0]))
+       // cond:
+       // result: (Const16 [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Mul16 (Const16 <t> [c]) (Mul16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpMul16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 (Const16 <t> [c]) (Mul16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpMul16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 (Mul16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 (Mul16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
        for {
-               t := v.Type
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst8 {
+               if v_1.Op != OpConst16 {
                        break
                }
-               c := v_1.AuxInt
-               if !(x.Op != OpConst8 && c > 0 && umagicOK(8, c)) {
+               if v_1.Type != t {
                        break
                }
-               v.reset(OpSub8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpMul8, t)
-               v1 := b.NewValue0(v.Pos, OpDiv8u, t)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpConst8, t)
-               v2.AuxInt = c
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v3 := b.NewValue0(v.Pos, OpConst8, t)
-               v3.AuxInt = c
-               v0.AddArg(v3)
+               c := v_1.AuxInt
+               v.reset(OpMul16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c * d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValuegeneric_OpMul16(v *Value) bool {
+func rewriteValuegeneric_OpMul32(v *Value) bool {
        b := v.Block
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mul16  (Const16 [c])  (Const16 [d]))
+       // match: (Mul32 (Const32 [c]) (Const32 [d]))
        // cond:
-       // result: (Const16 [int64(int16(c*d))])
+       // result: (Const32 [int64(int32(c*d))])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
-               v.reset(OpConst16)
-               v.AuxInt = int64(int16(c * d))
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c * d))
                return true
        }
-       // match: (Mul16 (Const16 [1]) x)
+       // match: (Mul32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
+       // match: (Mul32 (Const32 [1]) x)
        // cond:
        // result: x
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                if v_0.AuxInt != 1 {
@@ -8602,37 +11867,70 @@ func rewriteValuegeneric_OpMul16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Mul16 (Const16 [-1]) x)
+       // match: (Mul32 x (Const32 [1]))
        // cond:
-       // result: (Neg16 x)
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul32 (Const32 [-1]) x)
+       // cond:
+       // result: (Neg32 x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                if v_0.AuxInt != -1 {
                        break
                }
                x := v.Args[1]
-               v.reset(OpNeg16)
+               v.reset(OpNeg32)
                v.AddArg(x)
                return true
        }
-       // match: (Mul16 <t> n (Const16 [c]))
+       // match: (Mul32 x (Const32 [-1]))
+       // cond:
+       // result: (Neg32 x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpNeg32)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul32 <t> n (Const32 [c]))
        // cond: isPowerOfTwo(c)
-       // result: (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(c)]))
+       // result: (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(c)]))
        for {
                t := v.Type
                n := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
                c := v_1.AuxInt
                if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpLsh16x64)
+               v.reset(OpLsh32x64)
                v.Type = t
                v.AddArg(n)
                v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
@@ -8640,98 +11938,93 @@ func rewriteValuegeneric_OpMul16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mul16 <t> n (Const16 [c]))
-       // cond: t.IsSigned() && isPowerOfTwo(-c)
-       // result: (Neg16 (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
+       // match: (Mul32 <t> (Const32 [c]) n)
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(c)]))
        for {
                t := v.Type
-               n := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
                        break
                }
-               c := v_1.AuxInt
-               if !(t.IsSigned() && isPowerOfTwo(-c)) {
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpNeg16)
-               v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
-               v0.AddArg(n)
-               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
-               v1.AuxInt = log2(-c)
-               v0.AddArg(v1)
+               v.reset(OpLsh32x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
                v.AddArg(v0)
                return true
        }
-       // match: (Mul16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Mul16 (Const16 <t> [c]) x)
+       // match: (Mul32 <t> n (Const32 [c]))
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg32 (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
-               x := v.Args[0]
+               t := v.Type
+               n := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
                        break
                }
-               v.reset(OpMul16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
+               v.reset(OpNeg32)
+               v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
                v.AddArg(v0)
-               v.AddArg(x)
                return true
        }
-       // match: (Mul16 (Const16 [0]) _)
-       // cond:
-       // result: (Const16 [0])
+       // match: (Mul32 <t> (Const32 [c]) n)
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg32 (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
+               t := v.Type
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
-                       break
-               }
-               if v_0.AuxInt != 0 {
-                       break
-               }
-               v.reset(OpConst16)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (Mul16 x l:(Mul16 _ _))
-       // cond: (x.Op != OpMul16 && x.Op != OpConst16)
-       // result: (Mul16 l x)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpMul16 {
+               if v_0.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpMul16 && x.Op != OpConst16) {
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
                        break
                }
-               v.reset(OpMul16)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.reset(OpNeg32)
+               v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Mul16 (Const16 <t> [c]) (Mul16 (Const16 <t> [d]) x))
+       // match: (Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x))
        // cond:
-       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
+       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpMul16 {
+               if v_1.Op != OpAdd32 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
                v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               if v_1_0.Op != OpConst32 {
                        break
                }
                if v_1_0.Type != t {
@@ -8739,140 +12032,165 @@ func rewriteValuegeneric_OpMul16(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpMul16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = int64(int16(c * d))
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
                v.AddArg(v0)
-               v.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpMul32, t)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpMul32(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (Mul32  (Const32 [c])  (Const32 [d]))
+       // match: (Mul32 (Const32 <t> [c]) (Add32 <t> x (Const32 <t> [d])))
        // cond:
-       // result: (Const32 [int64(int32(c*d))])
+       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
                        break
                }
+               t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpConst32)
-               v.AuxInt = int64(int32(c * d))
+               if v_1.Type != t {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul32, t)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
                return true
        }
-       // match: (Mul32 (Const32 [1]) x)
+       // match: (Mul32 (Add32 <t> (Const32 <t> [d]) x) (Const32 <t> [c]))
        // cond:
-       // result: x
+       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               if v_0.AuxInt != 1 {
+               t := v_0.Type
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               if v_0_0.Type != t {
+                       break
+               }
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul32, t)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
                return true
        }
-       // match: (Mul32 (Const32 [-1]) x)
+       // match: (Mul32 (Add32 <t> x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
-       // result: (Neg32 x)
+       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               if v_0.AuxInt != -1 {
+               t := v_0.Type
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpNeg32)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Mul32 <t> n (Const32 [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(c)]))
-       for {
-               t := v.Type
-               n := v.Args[0]
+               if v_0_1.Type != t {
+                       break
+               }
+               d := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if v_1.Type != t {
                        break
                }
-               v.reset(OpLsh32x64)
-               v.Type = t
-               v.AddArg(n)
-               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
-               v0.AuxInt = log2(c)
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
                v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul32, t)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
                return true
        }
-       // match: (Mul32 <t> n (Const32 [c]))
-       // cond: t.IsSigned() && isPowerOfTwo(-c)
-       // result: (Neg32 (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
+       // match: (Mul32 (Const32 [0]) _)
+       // cond:
+       // result: (Const32 [0])
        for {
-               t := v.Type
-               n := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
                        break
                }
-               c := v_1.AuxInt
-               if !(t.IsSigned() && isPowerOfTwo(-c)) {
+               if v_0.AuxInt != 0 {
                        break
                }
-               v.reset(OpNeg32)
-               v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
-               v0.AddArg(n)
-               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
-               v1.AuxInt = log2(-c)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               v.reset(OpConst32)
+               v.AuxInt = 0
                return true
        }
-       // match: (Mul32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Mul32 (Const32 <t> [c]) x)
+       // match: (Mul32 _ (Const32 [0]))
+       // cond:
+       // result: (Const32 [0])
        for {
-               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpMul32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = 0
                return true
        }
-       // match: (Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x))
+       // match: (Mul32 (Const32 <t> [c]) (Mul32 (Const32 <t> [d]) x))
        // cond:
-       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
+       // result: (Mul32 (Const32 <t> [int64(int32(c*d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -8881,10 +12199,7 @@ func rewriteValuegeneric_OpMul32(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpAdd32 {
-                       break
-               }
-               if v_1.Type != t {
+               if v_1.Op != OpMul32 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -8896,73 +12211,96 @@ func rewriteValuegeneric_OpMul32(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpAdd32)
+               v.reset(OpMul32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c * d))
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpMul32, t)
-               v2 := b.NewValue0(v.Pos, OpConst32, t)
-               v2.AuxInt = c
-               v1.AddArg(v2)
-               v1.AddArg(x)
-               v.AddArg(v1)
+               v.AddArg(x)
                return true
        }
-       // match: (Mul32 (Const32 [0]) _)
+       // match: (Mul32 (Const32 <t> [c]) (Mul32 x (Const32 <t> [d])))
        // cond:
-       // result: (Const32 [0])
+       // result: (Mul32 (Const32 <t> [int64(int32(c*d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul32 {
                        break
                }
-               v.reset(OpConst32)
-               v.AuxInt = 0
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpMul32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Mul32 x l:(Mul32 _ _))
-       // cond: (x.Op != OpMul32 && x.Op != OpConst32)
-       // result: (Mul32 l x)
+       // match: (Mul32 (Mul32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Mul32 (Const32 <t> [int64(int32(c*d))]) x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpMul32 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpMul32 && x.Op != OpConst32) {
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpMul32)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Mul32 (Const32 <t> [c]) (Mul32 (Const32 <t> [d]) x))
+       // match: (Mul32 (Mul32 x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
        // result: (Mul32 (Const32 <t> [int64(int32(c*d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpMul32 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpMul32 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMul32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c * d))
@@ -8991,6 +12329,24 @@ func rewriteValuegeneric_OpMul32F(v *Value) bool {
                v.AuxInt = f2i(float64(i2f32(c) * i2f32(d)))
                return true
        }
+       // match: (Mul32F (Const32F [d]) (Const32F [c]))
+       // cond:
+       // result: (Const32F [f2i(float64(i2f32(c) * i2f32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst32F)
+               v.AuxInt = f2i(float64(i2f32(c) * i2f32(d)))
+               return true
+       }
        // match: (Mul32F x (Const32F [f2i(1)]))
        // cond:
        // result: x
@@ -9064,7 +12420,7 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mul64  (Const64 [c])  (Const64 [d]))
+       // match: (Mul64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c*d])
        for {
@@ -9082,6 +12438,24 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AuxInt = c * d
                return true
        }
+       // match: (Mul64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c*d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst64)
+               v.AuxInt = c * d
+               return true
+       }
        // match: (Mul64 (Const64 [1]) x)
        // cond:
        // result: x
@@ -9094,23 +12468,56 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                        break
                }
                x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul64 x (Const64 [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul64 (Const64 [-1]) x)
+       // cond:
+       // result: (Neg64 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpNeg64)
                v.AddArg(x)
                return true
        }
-       // match: (Mul64 (Const64 [-1]) x)
+       // match: (Mul64 x (Const64 [-1]))
        // cond:
        // result: (Neg64 x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if v_0.AuxInt != -1 {
+               if v_1.AuxInt != -1 {
                        break
                }
-               x := v.Args[1]
                v.reset(OpNeg64)
                v.AddArg(x)
                return true
@@ -9137,6 +12544,28 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Mul64 <t> (Const64 [c]) n)
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh64x64 <t> n (Const64 <types.UInt64> [log2(c)]))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpLsh64x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Mul64 <t> n (Const64 [c]))
        // cond: t.IsSigned() && isPowerOfTwo(-c)
        // result: (Neg64 (Lsh64x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
@@ -9160,25 +12589,27 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mul64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Mul64 (Const64 <t> [c]) x)
+       // match: (Mul64 <t> (Const64 [c]) n)
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg64 (Lsh64x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
                        break
                }
-               v.reset(OpMul64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
+               v.reset(OpNeg64)
+               v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
                v.AddArg(v0)
-               v.AddArg(x)
                return true
        }
        // match: (Mul64 (Const64 <t> [c]) (Add64 <t> (Const64 <t> [d]) x))
@@ -9219,6 +12650,120 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AddArg(v1)
                return true
        }
+       // match: (Mul64 (Const64 <t> [c]) (Add64 <t> x (Const64 <t> [d])))
+       // cond:
+       // result: (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul64, t)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
+       // match: (Mul64 (Add64 <t> (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               t := v_0.Type
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               if v_0_0.Type != t {
+                       break
+               }
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul64, t)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
+       // match: (Mul64 (Add64 <t> x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               t := v_0.Type
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               if v_0_1.Type != t {
+                       break
+               }
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul64, t)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
        // match: (Mul64 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
@@ -9234,21 +12779,19 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Mul64 x l:(Mul64 _ _))
-       // cond: (x.Op != OpMul64 && x.Op != OpConst64)
-       // result: (Mul64 l x)
+       // match: (Mul64 _ (Const64 [0]))
+       // cond:
+       // result: (Const64 [0])
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpMul64 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpMul64 && x.Op != OpConst64) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpMul64)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = 0
                return true
        }
        // match: (Mul64 (Const64 <t> [c]) (Mul64 (Const64 <t> [d]) x))
@@ -9269,11 +12812,101 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                if v_1_0.Op != OpConst64 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpMul64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul64 (Const64 <t> [c]) (Mul64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Mul64 (Const64 <t> [c*d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpMul64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul64 (Mul64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Mul64 (Const64 <t> [c*d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul64 (Mul64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Mul64 (Const64 <t> [c*d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMul64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
                v0.AuxInt = c * d
@@ -9302,6 +12935,24 @@ func rewriteValuegeneric_OpMul64F(v *Value) bool {
                v.AuxInt = f2i(i2f(c) * i2f(d))
                return true
        }
+       // match: (Mul64F (Const64F [d]) (Const64F [c]))
+       // cond:
+       // result: (Const64F [f2i(i2f(c) * i2f(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst64F)
+               v.AuxInt = f2i(i2f(c) * i2f(d))
+               return true
+       }
        // match: (Mul64F x (Const64F [f2i(1)]))
        // cond:
        // result: x
@@ -9375,7 +13026,7 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mul8   (Const8 [c])   (Const8 [d]))
+       // match: (Mul8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c*d))])
        for {
@@ -9393,7 +13044,25 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AuxInt = int64(int8(c * d))
                return true
        }
-       // match: (Mul8  (Const8  [1]) x)
+       // match: (Mul8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c * d))
+               return true
+       }
+       // match: (Mul8 (Const8 [1]) x)
        // cond:
        // result: x
        for {
@@ -9410,7 +13079,24 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Mul8  (Const8  [-1]) x)
+       // match: (Mul8 x (Const8 [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul8 (Const8 [-1]) x)
        // cond:
        // result: (Neg8  x)
        for {
@@ -9426,7 +13112,23 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Mul8  <t> n (Const8  [c]))
+       // match: (Mul8 x (Const8 [-1]))
+       // cond:
+       // result: (Neg8  x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpNeg8)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul8 <t> n (Const8 [c]))
        // cond: isPowerOfTwo(c)
        // result: (Lsh8x64  <t> n (Const64 <types.UInt64> [log2(c)]))
        for {
@@ -9448,7 +13150,29 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mul8  <t> n (Const8  [c]))
+       // match: (Mul8 <t> (Const8 [c]) n)
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh8x64  <t> n (Const64 <types.UInt64> [log2(c)]))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpLsh8x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul8 <t> n (Const8 [c]))
        // cond: t.IsSigned() && isPowerOfTwo(-c)
        // result: (Neg8  (Lsh8x64  <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
@@ -9471,28 +13195,30 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mul8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Mul8  (Const8  <t> [c]) x)
+       // match: (Mul8 <t> (Const8 [c]) n)
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg8  (Lsh8x64  <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst8 {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
                        break
                }
-               v.reset(OpMul8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
+               v.reset(OpNeg8)
+               v0 := b.NewValue0(v.Pos, OpLsh8x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
                v.AddArg(v0)
-               v.AddArg(x)
                return true
        }
-       // match: (Mul8  (Const8  [0]) _)
+       // match: (Mul8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -9507,24 +13233,22 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Mul8  x l:(Mul8  _ _))
-       // cond: (x.Op != OpMul8  && x.Op != OpConst8)
-       // result: (Mul8  l x)
+       // match: (Mul8 _ (Const8 [0]))
+       // cond:
+       // result: (Const8  [0])
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpMul8 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
-               if !(x.Op != OpMul8 && x.Op != OpConst8) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpMul8)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = 0
                return true
        }
-       // match: (Mul8  (Const8  <t> [c]) (Mul8  (Const8  <t> [d]) x))
+       // match: (Mul8 (Const8 <t> [c]) (Mul8 (Const8 <t> [d]) x))
        // cond:
        // result: (Mul8  (Const8  <t> [int64(int8(c*d))]) x)
        for {
@@ -9554,10 +13278,100 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Mul8 (Const8 <t> [c]) (Mul8 x (Const8 <t> [d])))
+       // cond:
+       // result: (Mul8  (Const8  <t> [int64(int8(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpMul8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul8 (Mul8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Mul8  (Const8  <t> [int64(int8(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul8 (Mul8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Mul8  (Const8  <t> [int64(int8(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeg16(v *Value) bool {
-       // match: (Neg16  (Const16  [c]))
+       // match: (Neg16 (Const16 [c]))
        // cond:
        // result: (Const16  [int64(-int16(c))])
        for {
@@ -9588,7 +13402,7 @@ func rewriteValuegeneric_OpNeg16(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpNeg32(v *Value) bool {
-       // match: (Neg32  (Const32  [c]))
+       // match: (Neg32 (Const32 [c]))
        // cond:
        // result: (Const32  [int64(-int32(c))])
        for {
@@ -9638,7 +13452,7 @@ func rewriteValuegeneric_OpNeg32F(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpNeg64(v *Value) bool {
-       // match: (Neg64  (Const64  [c]))
+       // match: (Neg64 (Const64 [c]))
        // cond:
        // result: (Const64  [-c])
        for {
@@ -9688,7 +13502,7 @@ func rewriteValuegeneric_OpNeg64F(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpNeg8(v *Value) bool {
-       // match: (Neg8   (Const8   [c]))
+       // match: (Neg8 (Const8 [c]))
        // cond:
        // result: (Const8   [int64( -int8(c))])
        for {
@@ -9701,7 +13515,7 @@ func rewriteValuegeneric_OpNeg8(v *Value) bool {
                v.AuxInt = int64(-int8(c))
                return true
        }
-       // match: (Neg8  (Sub8  x y))
+       // match: (Neg8 (Sub8 x y))
        // cond:
        // result: (Sub8  y x)
        for {
@@ -9747,15 +13561,75 @@ func rewriteValuegeneric_OpNeq16(v *Value) bool {
                if v_1.Op != OpAdd16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpNeq16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq16 (Const16 <t> [c]) (Add16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpNeq16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq16 (Add16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpNeq16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c - d))
@@ -9763,23 +13637,32 @@ func rewriteValuegeneric_OpNeq16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Neq16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Neq16 (Const16 <t> [c]) x)
+       // match: (Neq16 (Add16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpNeq16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int16(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -9802,6 +13685,24 @@ func rewriteValuegeneric_OpNeq16(v *Value) bool {
                v.AuxInt = b2i(c != d)
                return true
        }
+       // match: (Neq16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c != d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c != d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeq32(v *Value) bool {
@@ -9849,23 +13750,92 @@ func rewriteValuegeneric_OpNeq32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Neq32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Neq32 (Const32 <t> [c]) x)
+       // match: (Neq32 (Const32 <t> [c]) (Add32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Neq32 (Const32 <t> [int64(int32(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd32 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpNeq32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq32 (Add32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Neq32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
+               v.reset(OpNeq32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq32 (Add32 x (Const32 <t> [d])) (Const32 <t> [c]))
+       // cond:
+       // result: (Neq32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpNeq32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int32(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -9888,6 +13858,24 @@ func rewriteValuegeneric_OpNeq32(v *Value) bool {
                v.AuxInt = b2i(c != d)
                return true
        }
+       // match: (Neq32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c != d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c != d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeq64(v *Value) bool {
@@ -9935,23 +13923,92 @@ func rewriteValuegeneric_OpNeq64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Neq64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Neq64 (Const64 <t> [c]) x)
+       // match: (Neq64 (Const64 <t> [c]) (Add64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Neq64 (Const64 <t> [c-d]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpNeq64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq64 (Add64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Neq64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
+               v.reset(OpNeq64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq64 (Add64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Neq64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpNeq64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
+               v0.AuxInt = c - d
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -9974,12 +14031,30 @@ func rewriteValuegeneric_OpNeq64(v *Value) bool {
                v.AuxInt = b2i(c != d)
                return true
        }
+       // match: (Neq64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c != d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c != d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq8  x x)
+       // match: (Neq8 x x)
        // cond:
        // result: (ConstBool [0])
        for {
@@ -9991,7 +14066,37 @@ func rewriteValuegeneric_OpNeq8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Neq8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x))
+       // match: (Neq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x))
+       // cond:
+       // result: (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst8 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpNeq8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq8 (Const8 <t> [c]) (Add8 x (Const8 <t> [d])))
        // cond:
        // result: (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
        for {
@@ -10005,15 +14110,15 @@ func rewriteValuegeneric_OpNeq8(v *Value) bool {
                if v_1.Op != OpAdd8 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst8 {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               d := v_1_1.AuxInt
                v.reset(OpNeq8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
                v0.AuxInt = int64(int8(c - d))
@@ -10021,28 +14126,67 @@ func rewriteValuegeneric_OpNeq8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Neq8  x (Const8 <t>  [c]))
-       // cond: x.Op != OpConst8
-       // result: (Neq8  (Const8  <t> [c]) x)
+       // match: (Neq8 (Add8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
+               v.reset(OpNeq8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq8 (Add8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpNeq8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int8(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Neq8  (Const8  [c]) (Const8  [d]))
+       // match: (Neq8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c != d)])
        for {
@@ -10060,6 +14204,24 @@ func rewriteValuegeneric_OpNeq8(v *Value) bool {
                v.AuxInt = b2i(c != d)
                return true
        }
+       // match: (Neq8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c != d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c != d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeqB(v *Value) bool {
@@ -10317,7 +14479,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Eq8  x y))
+       // match: (Not (Eq8 x y))
        // cond:
        // result: (Neq8  x y)
        for {
@@ -10332,7 +14494,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (EqB  x y))
+       // match: (Not (EqB x y))
        // cond:
        // result: (NeqB  x y)
        for {
@@ -10392,7 +14554,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Neq8  x y))
+       // match: (Not (Neq8 x y))
        // cond:
        // result: (Eq8  x y)
        for {
@@ -10407,7 +14569,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (NeqB  x y))
+       // match: (Not (NeqB x y))
        // cond:
        // result: (EqB  x y)
        for {
@@ -10467,7 +14629,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Greater8  x y))
+       // match: (Not (Greater8 x y))
        // cond:
        // result: (Leq8  x y)
        for {
@@ -10527,7 +14689,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Greater8U  x y))
+       // match: (Not (Greater8U x y))
        // cond:
        // result: (Leq8U  x y)
        for {
@@ -10587,7 +14749,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Geq8  x y))
+       // match: (Not (Geq8 x y))
        // cond:
        // result: (Less8  x y)
        for {
@@ -10647,7 +14809,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Geq8U  x y))
+       // match: (Not (Geq8U x y))
        // cond:
        // result: (Less8U  x y)
        for {
@@ -10707,7 +14869,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Less8  x y))
+       // match: (Not (Less8 x y))
        // cond:
        // result: (Geq8  x y)
        for {
@@ -10767,7 +14929,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Less8U  x y))
+       // match: (Not (Less8U x y))
        // cond:
        // result: (Geq8U  x y)
        for {
@@ -10827,7 +14989,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Leq8  x y))
+       // match: (Not (Leq8 x y))
        // cond:
        // result: (Greater8 x y)
        for {
@@ -10887,7 +15049,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Leq8U  x y))
+       // match: (Not (Leq8U x y))
        // cond:
        // result: (Greater8U  x y)
        for {
@@ -10942,7 +15104,7 @@ func rewriteValuegeneric_OpOffPtr(v *Value) bool {
 func rewriteValuegeneric_OpOr16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Or16  (Const16 [c])  (Const16 [d]))
+       // match: (Or16 (Const16 [c]) (Const16 [d]))
        // cond:
        // result: (Const16 [int64(int16(c|d))])
        for {
@@ -10960,25 +15122,22 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AuxInt = int64(int16(c | d))
                return true
        }
-       // match: (Or16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Or16 (Const16 <t> [c]) x)
+       // match: (Or16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c|d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
-                       break
-               }
-               v.reset(OpOr16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c | d))
                return true
        }
        // match: (Or16 x x)
@@ -11011,6 +15170,23 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Or16 x (Const16 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Or16 (Const16 [-1]) _)
        // cond:
        // result: (Const16 [-1])
@@ -11026,6 +15202,21 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AuxInt = -1
                return true
        }
+       // match: (Or16 _ (Const16 [-1]))
+       // cond:
+       // result: (Const16 [-1])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = -1
+               return true
+       }
        // match: (Or16 x (Or16 x y))
        // cond:
        // result: (Or16 x y)
@@ -11080,7 +15271,7 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or16 (Or16 x y) y)
+       // match: (Or16 (Or16 y x) x)
        // cond:
        // result: (Or16 x y)
        for {
@@ -11088,9 +15279,9 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                if v_0.Op != OpOr16 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpOr16)
@@ -11098,72 +15289,223 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or16 x l:(Or16 _ _))
-       // cond: (x.Op != OpOr16 && x.Op != OpConst16)
-       // result: (Or16 l x)
+       // match: (Or16 (Or16 i:(Const16 <t>) z) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Or16 i (Or16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr16 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpOr16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or16 (Or16 z i:(Const16 <t>)) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Or16 i (Or16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr16 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpOr16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or16 x (Or16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Or16 i (Or16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr16 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpOr16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or16 x (Or16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Or16 i (Or16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr16 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpOr16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or16 (Const16 <t> [c]) (Or16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (Or16 (Const16 <t> [int64(int16(c|d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpOr16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c | d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Or16 (Const16 <t> [c]) (Or16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Or16 (Const16 <t> [int64(int16(c|d))]) x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpOr16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
                        break
                }
-               if !(x.Op != OpOr16 && x.Op != OpConst16) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpOr16)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c | d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Or16 (Or16 i:(Const16 <t>) z) x)
-       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
-       // result: (Or16 i (Or16 <t> z x))
+       // match: (Or16 (Or16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Or16 (Const16 <t> [int64(int16(c|d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpOr16 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst16 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpOr16)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpOr16, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c | d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Or16 (Const16 <t> [c]) (Or16 (Const16 <t> [d]) x))
+       // match: (Or16 (Or16 x (Const16 <t> [d])) (Const16 <t> [c]))
        // cond:
        // result: (Or16 (Const16 <t> [int64(int16(c|d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpOr16 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpOr16 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpOr16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c | d))
@@ -11176,7 +15518,7 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
 func rewriteValuegeneric_OpOr32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Or32  (Const32 [c])  (Const32 [d]))
+       // match: (Or32 (Const32 [c]) (Const32 [d]))
        // cond:
        // result: (Const32 [int64(int32(c|d))])
        for {
@@ -11194,25 +15536,22 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AuxInt = int64(int32(c | d))
                return true
        }
-       // match: (Or32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Or32 (Const32 <t> [c]) x)
+       // match: (Or32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c|d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
-                       break
-               }
-               v.reset(OpOr32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c | d))
                return true
        }
        // match: (Or32 x x)
@@ -11245,6 +15584,23 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Or32 x (Const32 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Or32 (Const32 [-1]) _)
        // cond:
        // result: (Const32 [-1])
@@ -11260,6 +15616,21 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AuxInt = -1
                return true
        }
+       // match: (Or32 _ (Const32 [-1]))
+       // cond:
+       // result: (Const32 [-1])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpConst32)
+               v.AuxInt = -1
+               return true
+       }
        // match: (Or32 x (Or32 x y))
        // cond:
        // result: (Or32 x y)
@@ -11314,7 +15685,7 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or32 (Or32 x y) y)
+       // match: (Or32 (Or32 y x) x)
        // cond:
        // result: (Or32 x y)
        for {
@@ -11322,9 +15693,9 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                if v_0.Op != OpOr32 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpOr32)
@@ -11332,72 +15703,223 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or32 x l:(Or32 _ _))
-       // cond: (x.Op != OpOr32 && x.Op != OpConst32)
-       // result: (Or32 l x)
+       // match: (Or32 (Or32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Or32 i (Or32 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpOr32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or32 (Or32 z i:(Const32 <t>)) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Or32 i (Or32 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr32 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpOr32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or32 x (Or32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Or32 i (Or32 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr32 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpOr32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or32 x (Or32 z i:(Const32 <t>)))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Or32 i (Or32 <t> z x))
        for {
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpOr32 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr32 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpOr32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or32 (Const32 <t> [c]) (Or32 (Const32 <t> [d]) x))
+       // cond:
+       // result: (Or32 (Const32 <t> [int64(int32(c|d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr32 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst32 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpOr32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c | d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Or32 (Const32 <t> [c]) (Or32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Or32 (Const32 <t> [int64(int32(c|d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr32 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpOr32 && x.Op != OpConst32) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpOr32)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c | d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Or32 (Or32 i:(Const32 <t>) z) x)
-       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Or32 i (Or32 <t> z x))
+       // match: (Or32 (Or32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Or32 (Const32 <t> [int64(int32(c|d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpOr32 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst32 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpOr32)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpOr32, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c | d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Or32 (Const32 <t> [c]) (Or32 (Const32 <t> [d]) x))
+       // match: (Or32 (Or32 x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
        // result: (Or32 (Const32 <t> [int64(int32(c|d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpOr32 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpOr32 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpOr32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c | d))
@@ -11410,7 +15932,7 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
 func rewriteValuegeneric_OpOr64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Or64  (Const64 [c])  (Const64 [d]))
+       // match: (Or64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c|d])
        for {
@@ -11428,25 +15950,22 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
                v.AuxInt = c | d
                return true
        }
-       // match: (Or64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Or64 (Const64 <t> [c]) x)
+       // match: (Or64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c|d])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
-                       break
-               }
-               v.reset(OpOr64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = c | d
                return true
        }
        // match: (Or64 x x)
@@ -11479,6 +15998,23 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Or64 x (Const64 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Or64 (Const64 [-1]) _)
        // cond:
        // result: (Const64 [-1])
@@ -11494,6 +16030,21 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
                v.AuxInt = -1
                return true
        }
+       // match: (Or64 _ (Const64 [-1]))
+       // cond:
+       // result: (Const64 [-1])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpConst64)
+               v.AuxInt = -1
+               return true
+       }
        // match: (Or64 x (Or64 x y))
        // cond:
        // result: (Or64 x y)
@@ -11548,90 +16099,241 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or64 (Or64 x y) y)
+       // match: (Or64 (Or64 y x) x)
+       // cond:
+       // result: (Or64 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr64 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (Or64 (Or64 i:(Const64 <t>) z) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Or64 i (Or64 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr64 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or64 (Or64 z i:(Const64 <t>)) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Or64 i (Or64 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr64 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or64 x (Or64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Or64 i (Or64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or64 x (Or64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Or64 i (Or64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr64 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or64 (Const64 <t> [c]) (Or64 (Const64 <t> [d]) x))
+       // cond:
+       // result: (Or64 (Const64 <t> [c|d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr64 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst64 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpOr64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c | d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Or64 (Const64 <t> [c]) (Or64 x (Const64 <t> [d])))
        // cond:
-       // result: (Or64 x y)
+       // result: (Or64 (Const64 <t> [c|d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpOr64 {
+               if v_0.Op != OpConst64 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr64 {
                        break
                }
-               v.reset(OpOr64)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (Or64 x l:(Or64 _ _))
-       // cond: (x.Op != OpOr64 && x.Op != OpConst64)
-       // result: (Or64 l x)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpOr64 {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpOr64 && x.Op != OpConst64) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpOr64)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c | d
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Or64 (Or64 i:(Const64 <t>) z) x)
-       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
-       // result: (Or64 i (Or64 <t> z x))
+       // match: (Or64 (Or64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Or64 (Const64 <t> [c|d]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpOr64 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst64 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpOr64)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpOr64, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c | d
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Or64 (Const64 <t> [c]) (Or64 (Const64 <t> [d]) x))
+       // match: (Or64 (Or64 x (Const64 <t> [d])) (Const64 <t> [c]))
        // cond:
        // result: (Or64 (Const64 <t> [c|d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpOr64 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpOr64 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst64 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpOr64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
                v0.AuxInt = c | d
@@ -11644,7 +16346,7 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
 func rewriteValuegeneric_OpOr8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Or8   (Const8 [c])   (Const8 [d]))
+       // match: (Or8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c|d))])
        for {
@@ -11662,28 +16364,25 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AuxInt = int64(int8(c | d))
                return true
        }
-       // match: (Or8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Or8  (Const8  <t> [c]) x)
+       // match: (Or8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c|d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
-                       break
-               }
-               v.reset(OpOr8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c | d))
                return true
        }
-       // match: (Or8  x x)
+       // match: (Or8 x x)
        // cond:
        // result: x
        for {
@@ -11696,7 +16395,7 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Or8  (Const8  [0]) x)
+       // match: (Or8 (Const8 [0]) x)
        // cond:
        // result: x
        for {
@@ -11713,7 +16412,24 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Or8  (Const8  [-1]) _)
+       // match: (Or8 x (Const8 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Or8 (Const8 [-1]) _)
        // cond:
        // result: (Const8  [-1])
        for {
@@ -11728,7 +16444,22 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AuxInt = -1
                return true
        }
-       // match: (Or8  x (Or8  x y))
+       // match: (Or8 _ (Const8 [-1]))
+       // cond:
+       // result: (Const8  [-1])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpConst8)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (Or8 x (Or8 x y))
        // cond:
        // result: (Or8  x y)
        for {
@@ -11746,7 +16477,7 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or8  x (Or8  y x))
+       // match: (Or8 x (Or8 y x))
        // cond:
        // result: (Or8  x y)
        for {
@@ -11764,7 +16495,25 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or8  (Or8  x y) x)
+       // match: (Or8 (Or8 x y) x)
+       // cond:
+       // result: (Or8  x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr8 {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (Or8 (Or8 y x) x)
        // cond:
        // result: (Or8  x y)
        for {
@@ -11772,100 +16521,233 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                if v_0.Op != OpOr8 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if x != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (Or8 (Or8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Or8  i (Or8  <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr8 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or8 (Or8 z i:(Const8 <t>)) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Or8  i (Or8  <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr8 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or8 x (Or8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Or8  i (Or8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr8 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or8 x (Or8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Or8  i (Or8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr8 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or8 (Const8 <t> [c]) (Or8 (Const8 <t> [d]) x))
+       // cond:
+       // result: (Or8  (Const8  <t> [int64(int8(c|d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst8 {
+                       break
+               }
+               if v_1_0.Type != t {
                        break
                }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
                v.reset(OpOr8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c | d))
+               v.AddArg(v0)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (Or8  (Or8  x y) y)
+       // match: (Or8 (Const8 <t> [c]) (Or8 x (Const8 <t> [d])))
        // cond:
-       // result: (Or8  x y)
+       // result: (Or8  (Const8  <t> [int64(int8(c|d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpOr8 {
+               if v_0.Op != OpConst8 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr8 {
                        break
                }
-               v.reset(OpOr8)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (Or8  x l:(Or8  _ _))
-       // cond: (x.Op != OpOr8  && x.Op != OpConst8)
-       // result: (Or8  l x)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpOr8 {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
                        break
                }
-               if !(x.Op != OpOr8 && x.Op != OpConst8) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpOr8)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c | d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Or8  (Or8  i:(Const8  <t>) z) x)
-       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
-       // result: (Or8  i (Or8  <t> z x))
+       // match: (Or8 (Or8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Or8  (Const8  <t> [int64(int8(c|d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpOr8 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst8 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpOr8)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpOr8, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c | d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Or8  (Const8  <t> [c]) (Or8  (Const8  <t> [d]) x))
+       // match: (Or8 (Or8 x (Const8 <t> [d])) (Const8 <t> [c]))
        // cond:
        // result: (Or8  (Const8  <t> [int64(int8(c|d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
+               if v_0.Op != OpOr8 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpOr8 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst8 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpOr8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
                v0.AuxInt = int64(int8(c | d))
@@ -11876,7 +16758,7 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpPhi(v *Value) bool {
-       // match: (Phi (Const8  [c]) (Const8  [c]))
+       // match: (Phi (Const8 [c]) (Const8 [c]))
        // cond:
        // result: (Const8  [c])
        for {
@@ -12266,7 +17148,7 @@ func rewriteValuegeneric_OpRsh16Ux64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16Ux64 (Lsh16x64 x (Const64  [8])) (Const64  [8]))
+       // match: (Rsh16Ux64 (Lsh16x64 x (Const64 [8])) (Const64 [8]))
        // cond:
        // result: (ZeroExt8to16  (Trunc16to8  <types.UInt8>  x))
        for {
@@ -12300,7 +17182,7 @@ func rewriteValuegeneric_OpRsh16Ux64(v *Value) bool {
 func rewriteValuegeneric_OpRsh16Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16Ux8  <t> x (Const8  [c]))
+       // match: (Rsh16Ux8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh16Ux64 x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -12338,7 +17220,7 @@ func rewriteValuegeneric_OpRsh16Ux8(v *Value) bool {
 func rewriteValuegeneric_OpRsh16x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x16  <t> x (Const16 [c]))
+       // match: (Rsh16x16 <t> x (Const16 [c]))
        // cond:
        // result: (Rsh16x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -12356,7 +17238,7 @@ func rewriteValuegeneric_OpRsh16x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x16  (Const16 [0]) _)
+       // match: (Rsh16x16 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -12376,7 +17258,7 @@ func rewriteValuegeneric_OpRsh16x16(v *Value) bool {
 func rewriteValuegeneric_OpRsh16x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x32  <t> x (Const32 [c]))
+       // match: (Rsh16x32 <t> x (Const32 [c]))
        // cond:
        // result: (Rsh16x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -12394,7 +17276,7 @@ func rewriteValuegeneric_OpRsh16x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x32  (Const16 [0]) _)
+       // match: (Rsh16x32 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -12416,7 +17298,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x64  (Const16 [c]) (Const64 [d]))
+       // match: (Rsh16x64 (Const16 [c]) (Const64 [d]))
        // cond:
        // result: (Const16 [int64(int16(c) >> uint64(d))])
        for {
@@ -12434,7 +17316,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
                v.AuxInt = int64(int16(c) >> uint64(d))
                return true
        }
-       // match: (Rsh16x64  x (Const64 [0]))
+       // match: (Rsh16x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -12451,7 +17333,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh16x64  (Const16 [0]) _)
+       // match: (Rsh16x64 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -12496,7 +17378,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x64 (Lsh16x64 x (Const64  [8])) (Const64  [8]))
+       // match: (Rsh16x64 (Lsh16x64 x (Const64 [8])) (Const64 [8]))
        // cond:
        // result: (SignExt8to16  (Trunc16to8  <types.Int8>  x))
        for {
@@ -12530,7 +17412,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
 func rewriteValuegeneric_OpRsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x8   <t> x (Const8  [c]))
+       // match: (Rsh16x8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh16x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -12548,7 +17430,7 @@ func rewriteValuegeneric_OpRsh16x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x8  (Const16 [0]) _)
+       // match: (Rsh16x8 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -12843,7 +17725,7 @@ func rewriteValuegeneric_OpRsh32Ux64(v *Value) bool {
 func rewriteValuegeneric_OpRsh32Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32Ux8  <t> x (Const8  [c]))
+       // match: (Rsh32Ux8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh32Ux64 x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -12881,7 +17763,7 @@ func rewriteValuegeneric_OpRsh32Ux8(v *Value) bool {
 func rewriteValuegeneric_OpRsh32x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x16  <t> x (Const16 [c]))
+       // match: (Rsh32x16 <t> x (Const16 [c]))
        // cond:
        // result: (Rsh32x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -12899,7 +17781,7 @@ func rewriteValuegeneric_OpRsh32x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh32x16  (Const32 [0]) _)
+       // match: (Rsh32x16 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -12919,7 +17801,7 @@ func rewriteValuegeneric_OpRsh32x16(v *Value) bool {
 func rewriteValuegeneric_OpRsh32x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x32  <t> x (Const32 [c]))
+       // match: (Rsh32x32 <t> x (Const32 [c]))
        // cond:
        // result: (Rsh32x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -12937,7 +17819,7 @@ func rewriteValuegeneric_OpRsh32x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh32x32  (Const32 [0]) _)
+       // match: (Rsh32x32 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -12959,7 +17841,7 @@ func rewriteValuegeneric_OpRsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x64  (Const32 [c]) (Const64 [d]))
+       // match: (Rsh32x64 (Const32 [c]) (Const64 [d]))
        // cond:
        // result: (Const32 [int64(int32(c) >> uint64(d))])
        for {
@@ -12977,7 +17859,7 @@ func rewriteValuegeneric_OpRsh32x64(v *Value) bool {
                v.AuxInt = int64(int32(c) >> uint64(d))
                return true
        }
-       // match: (Rsh32x64  x (Const64 [0]))
+       // match: (Rsh32x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -12994,7 +17876,7 @@ func rewriteValuegeneric_OpRsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh32x64  (Const32 [0]) _)
+       // match: (Rsh32x64 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -13102,7 +17984,7 @@ func rewriteValuegeneric_OpRsh32x64(v *Value) bool {
 func rewriteValuegeneric_OpRsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x8   <t> x (Const8  [c]))
+       // match: (Rsh32x8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh32x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -13120,7 +18002,7 @@ func rewriteValuegeneric_OpRsh32x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh32x8  (Const32 [0]) _)
+       // match: (Rsh32x8 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -13444,7 +18326,7 @@ func rewriteValuegeneric_OpRsh64Ux64(v *Value) bool {
 func rewriteValuegeneric_OpRsh64Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64Ux8  <t> x (Const8  [c]))
+       // match: (Rsh64Ux8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh64Ux64 x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -13482,7 +18364,7 @@ func rewriteValuegeneric_OpRsh64Ux8(v *Value) bool {
 func rewriteValuegeneric_OpRsh64x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x16  <t> x (Const16 [c]))
+       // match: (Rsh64x16 <t> x (Const16 [c]))
        // cond:
        // result: (Rsh64x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -13500,7 +18382,7 @@ func rewriteValuegeneric_OpRsh64x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh64x16  (Const64 [0]) _)
+       // match: (Rsh64x16 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -13520,7 +18402,7 @@ func rewriteValuegeneric_OpRsh64x16(v *Value) bool {
 func rewriteValuegeneric_OpRsh64x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x32  <t> x (Const32 [c]))
+       // match: (Rsh64x32 <t> x (Const32 [c]))
        // cond:
        // result: (Rsh64x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -13538,7 +18420,7 @@ func rewriteValuegeneric_OpRsh64x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh64x32  (Const64 [0]) _)
+       // match: (Rsh64x32 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -13560,7 +18442,7 @@ func rewriteValuegeneric_OpRsh64x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x64  (Const64 [c]) (Const64 [d]))
+       // match: (Rsh64x64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c >> uint64(d)])
        for {
@@ -13578,7 +18460,7 @@ func rewriteValuegeneric_OpRsh64x64(v *Value) bool {
                v.AuxInt = c >> uint64(d)
                return true
        }
-       // match: (Rsh64x64  x (Const64 [0]))
+       // match: (Rsh64x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -13595,7 +18477,7 @@ func rewriteValuegeneric_OpRsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh64x64  (Const64 [0]) _)
+       // match: (Rsh64x64 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -13732,7 +18614,7 @@ func rewriteValuegeneric_OpRsh64x64(v *Value) bool {
 func rewriteValuegeneric_OpRsh64x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x8   <t> x (Const8  [c]))
+       // match: (Rsh64x8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh64x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -13750,7 +18632,7 @@ func rewriteValuegeneric_OpRsh64x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh64x8  (Const64 [0]) _)
+       // match: (Rsh64x8 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -13788,7 +18670,7 @@ func rewriteValuegeneric_OpRsh8Ux16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux16  (Const8 [0]) _)
+       // match: (Rsh8Ux16 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -13826,7 +18708,7 @@ func rewriteValuegeneric_OpRsh8Ux32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux32  (Const8 [0]) _)
+       // match: (Rsh8Ux32 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -13848,7 +18730,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux64  (Const8  [c]) (Const64 [d]))
+       // match: (Rsh8Ux64 (Const8 [c]) (Const64 [d]))
        // cond:
        // result: (Const8  [int64(int8(uint8(c) >> uint64(d)))])
        for {
@@ -13866,7 +18748,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
                v.AuxInt = int64(int8(uint8(c) >> uint64(d)))
                return true
        }
-       // match: (Rsh8Ux64  x (Const64 [0]))
+       // match: (Rsh8Ux64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -13883,7 +18765,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh8Ux64  (Const8 [0]) _)
+       // match: (Rsh8Ux64 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -13898,7 +18780,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Rsh8Ux64  _ (Const64 [c]))
+       // match: (Rsh8Ux64 _ (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (Const8  [0])
        for {
@@ -13914,7 +18796,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Rsh8Ux64  <t> (Rsh8Ux64  x (Const64 [c])) (Const64 [d]))
+       // match: (Rsh8Ux64 <t> (Rsh8Ux64 x (Const64 [c])) (Const64 [d]))
        // cond: !uaddOvf(c,d)
        // result: (Rsh8Ux64  x (Const64 <t> [c+d]))
        for {
@@ -13987,7 +18869,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
 func rewriteValuegeneric_OpRsh8Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8Ux8  <t> x (Const8  [c]))
+       // match: (Rsh8Ux8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh8Ux64 x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -14005,7 +18887,7 @@ func rewriteValuegeneric_OpRsh8Ux8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux8  (Const8 [0]) _)
+       // match: (Rsh8Ux8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14025,7 +18907,7 @@ func rewriteValuegeneric_OpRsh8Ux8(v *Value) bool {
 func rewriteValuegeneric_OpRsh8x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x16  <t> x (Const16 [c]))
+       // match: (Rsh8x16 <t> x (Const16 [c]))
        // cond:
        // result: (Rsh8x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -14043,7 +18925,7 @@ func rewriteValuegeneric_OpRsh8x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x16   (Const8 [0]) _)
+       // match: (Rsh8x16 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14063,7 +18945,7 @@ func rewriteValuegeneric_OpRsh8x16(v *Value) bool {
 func rewriteValuegeneric_OpRsh8x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x32  <t> x (Const32 [c]))
+       // match: (Rsh8x32 <t> x (Const32 [c]))
        // cond:
        // result: (Rsh8x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -14081,7 +18963,7 @@ func rewriteValuegeneric_OpRsh8x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x32   (Const8 [0]) _)
+       // match: (Rsh8x32 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14101,7 +18983,7 @@ func rewriteValuegeneric_OpRsh8x32(v *Value) bool {
 func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x64   (Const8  [c]) (Const64 [d]))
+       // match: (Rsh8x64 (Const8 [c]) (Const64 [d]))
        // cond:
        // result: (Const8  [int64(int8(c) >> uint64(d))])
        for {
@@ -14119,7 +19001,7 @@ func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
                v.AuxInt = int64(int8(c) >> uint64(d))
                return true
        }
-       // match: (Rsh8x64   x (Const64 [0]))
+       // match: (Rsh8x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -14136,7 +19018,7 @@ func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh8x64   (Const8 [0]) _)
+       // match: (Rsh8x64 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14151,7 +19033,7 @@ func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Rsh8x64  <t> (Rsh8x64  x (Const64 [c])) (Const64 [d]))
+       // match: (Rsh8x64 <t> (Rsh8x64 x (Const64 [c])) (Const64 [d]))
        // cond: !uaddOvf(c,d)
        // result: (Rsh8x64  x (Const64 <t> [c+d]))
        for {
@@ -14186,7 +19068,7 @@ func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
 func rewriteValuegeneric_OpRsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x8   <t> x (Const8  [c]))
+       // match: (Rsh8x8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh8x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -14204,7 +19086,7 @@ func rewriteValuegeneric_OpRsh8x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x8   (Const8 [0]) _)
+       // match: (Rsh8x8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14345,7 +19227,7 @@ func rewriteValuegeneric_OpSignExt32to64(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  (Const8  [c]))
+       // match: (SignExt8to16 (Const8 [c]))
        // cond:
        // result: (Const16 [int64(  int8(c))])
        for {
@@ -14358,7 +19240,7 @@ func rewriteValuegeneric_OpSignExt8to16(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (SignExt8to16  (Trunc16to8  x:(Rsh16x64 _ (Const64 [s]))))
+       // match: (SignExt8to16 (Trunc16to8 x:(Rsh16x64 _ (Const64 [s]))))
        // cond: s >= 8
        // result: x
        for {
@@ -14386,7 +19268,7 @@ func rewriteValuegeneric_OpSignExt8to16(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  (Const8  [c]))
+       // match: (SignExt8to32 (Const8 [c]))
        // cond:
        // result: (Const32 [int64(  int8(c))])
        for {
@@ -14399,7 +19281,7 @@ func rewriteValuegeneric_OpSignExt8to32(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (SignExt8to32  (Trunc32to8  x:(Rsh32x64 _ (Const64 [s]))))
+       // match: (SignExt8to32 (Trunc32to8 x:(Rsh32x64 _ (Const64 [s]))))
        // cond: s >= 24
        // result: x
        for {
@@ -14427,7 +19309,7 @@ func rewriteValuegeneric_OpSignExt8to32(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpSignExt8to64(v *Value) bool {
-       // match: (SignExt8to64  (Const8  [c]))
+       // match: (SignExt8to64 (Const8 [c]))
        // cond:
        // result: (Const64 [int64(  int8(c))])
        for {
@@ -14440,7 +19322,7 @@ func rewriteValuegeneric_OpSignExt8to64(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (SignExt8to64  (Trunc64to8  x:(Rsh64x64 _ (Const64 [s]))))
+       // match: (SignExt8to64 (Trunc64to8 x:(Rsh64x64 _ (Const64 [s]))))
        // cond: s >= 56
        // result: x
        for {
@@ -15285,7 +20167,7 @@ func rewriteValuegeneric_OpStructSelect(v *Value) bool {
 func rewriteValuegeneric_OpSub16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Sub16  (Const16 [c]) (Const16 [d]))
+       // match: (Sub16 (Const16 [c]) (Const16 [d]))
        // cond:
        // result: (Const16 [int64(int16(c-d))])
        for {
@@ -15354,6 +20236,24 @@ func rewriteValuegeneric_OpSub16(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (Sub16 (Add16 y x) x)
+       // cond:
+       // result: y
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (Sub16 (Add16 x y) y)
        // cond:
        // result: x
@@ -15372,6 +20272,24 @@ func rewriteValuegeneric_OpSub16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub16 (Add16 y x) y)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if y != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Sub16 x (Sub16 i:(Const16 <t>) z))
        // cond: (z.Op != OpConst16 && x.Op != OpConst16)
        // result: (Sub16 (Add16 <t> x z) i)
@@ -15489,7 +20407,7 @@ func rewriteValuegeneric_OpSub16(v *Value) bool {
 func rewriteValuegeneric_OpSub32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Sub32  (Const32 [c]) (Const32 [d]))
+       // match: (Sub32 (Const32 [c]) (Const32 [d]))
        // cond:
        // result: (Const32 [int64(int32(c-d))])
        for {
@@ -15558,6 +20476,24 @@ func rewriteValuegeneric_OpSub32(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (Sub32 (Add32 y x) x)
+       // cond:
+       // result: y
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (Sub32 (Add32 x y) y)
        // cond:
        // result: x
@@ -15576,6 +20512,24 @@ func rewriteValuegeneric_OpSub32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub32 (Add32 y x) y)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if y != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Sub32 x (Sub32 i:(Const32 <t>) z))
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
        // result: (Sub32 (Add32 <t> x z) i)
@@ -15731,7 +20685,7 @@ func rewriteValuegeneric_OpSub32F(v *Value) bool {
 func rewriteValuegeneric_OpSub64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Sub64  (Const64 [c]) (Const64 [d]))
+       // match: (Sub64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c-d])
        for {
@@ -15800,6 +20754,24 @@ func rewriteValuegeneric_OpSub64(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (Sub64 (Add64 y x) x)
+       // cond:
+       // result: y
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (Sub64 (Add64 x y) y)
        // cond:
        // result: x
@@ -15818,6 +20790,24 @@ func rewriteValuegeneric_OpSub64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub64 (Add64 y x) y)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if y != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Sub64 x (Sub64 i:(Const64 <t>) z))
        // cond: (z.Op != OpConst64 && x.Op != OpConst64)
        // result: (Sub64 (Add64 <t> x z) i)
@@ -15973,7 +20963,7 @@ func rewriteValuegeneric_OpSub64F(v *Value) bool {
 func rewriteValuegeneric_OpSub8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Sub8   (Const8 [c]) (Const8 [d]))
+       // match: (Sub8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8 [int64(int8(c-d))])
        for {
@@ -15991,7 +20981,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AuxInt = int64(int8(c - d))
                return true
        }
-       // match: (Sub8  x (Const8  <t> [c]))
+       // match: (Sub8 x (Const8 <t> [c]))
        // cond: x.Op != OpConst8
        // result: (Add8  (Const8  <t> [int64(int8(-c))]) x)
        for {
@@ -16012,7 +21002,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Sub8  x x)
+       // match: (Sub8 x x)
        // cond:
        // result: (Const8  [0])
        for {
@@ -16024,7 +21014,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Sub8  (Add8  x y) x)
+       // match: (Sub8 (Add8 x y) x)
        // cond:
        // result: y
        for {
@@ -16042,7 +21032,25 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Sub8  (Add8  x y) y)
+       // match: (Sub8 (Add8 y x) x)
+       // cond:
+       // result: y
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (Sub8 (Add8 x y) y)
        // cond:
        // result: x
        for {
@@ -16060,7 +21068,25 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Sub8  x (Sub8  i:(Const8  <t>) z))
+       // match: (Sub8 (Add8 y x) y)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if y != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Sub8 x (Sub8 i:(Const8 <t>) z))
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Sub8  (Add8  <t> x z) i)
        for {
@@ -16086,7 +21112,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Sub8  x (Sub8  z i:(Const8  <t>)))
+       // match: (Sub8 x (Sub8 z i:(Const8 <t>)))
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Add8  i (Sub8  <t> x z))
        for {
@@ -16112,7 +21138,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Sub8  (Const8  <t> [c]) (Sub8  x (Const8  <t> [d])))
+       // match: (Sub8 (Const8 <t> [c]) (Sub8 x (Const8 <t> [d])))
        // cond:
        // result: (Sub8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
@@ -16142,7 +21168,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Sub8  (Const8  <t> [c]) (Sub8  (Const8  <t> [d]) x))
+       // match: (Sub8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x))
        // cond:
        // result: (Add8  (Const8  <t> [int64(int8(c-d))]) x)
        for {
@@ -16175,7 +21201,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  (Const16 [c]))
+       // match: (Trunc16to8 (Const16 [c]))
        // cond:
        // result: (Const8   [int64(int8(c))])
        for {
@@ -16188,7 +21214,7 @@ func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (Trunc16to8  (ZeroExt8to16  x))
+       // match: (Trunc16to8 (ZeroExt8to16 x))
        // cond:
        // result: x
        for {
@@ -16202,7 +21228,7 @@ func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc16to8  (SignExt8to16  x))
+       // match: (Trunc16to8 (SignExt8to16 x))
        // cond:
        // result: x
        for {
@@ -16216,7 +21242,7 @@ func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc16to8  (And16 (Const16 [y]) x))
+       // match: (Trunc16to8 (And16 (Const16 [y]) x))
        // cond: y&0xFF == 0xFF
        // result: (Trunc16to8 x)
        for {
@@ -16237,6 +21263,27 @@ func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc16to8 (And16 x (Const16 [y])))
+       // cond: y&0xFF == 0xFF
+       // result: (Trunc16to8 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFF == 0xFF) {
+                       break
+               }
+               v.reset(OpTrunc16to8)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
@@ -16253,7 +21300,7 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
                v.AuxInt = int64(int16(c))
                return true
        }
-       // match: (Trunc32to16 (ZeroExt8to32  x))
+       // match: (Trunc32to16 (ZeroExt8to32 x))
        // cond:
        // result: (ZeroExt8to16  x)
        for {
@@ -16280,7 +21327,7 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc32to16 (SignExt8to32  x))
+       // match: (Trunc32to16 (SignExt8to32 x))
        // cond:
        // result: (SignExt8to16  x)
        for {
@@ -16307,7 +21354,28 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc32to16 (And32 (Const32 [y]) x))
+       // match: (Trunc32to16 (And32 (Const32 [y]) x))
+       // cond: y&0xFFFF == 0xFFFF
+       // result: (Trunc32to16 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               y := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(y&0xFFFF == 0xFFFF) {
+                       break
+               }
+               v.reset(OpTrunc32to16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Trunc32to16 (And32 x (Const32 [y])))
        // cond: y&0xFFFF == 0xFFFF
        // result: (Trunc32to16 x)
        for {
@@ -16315,12 +21383,12 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
                if v_0.Op != OpAnd32 {
                        break
                }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst32 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               y := v_0_0.AuxInt
-               x := v_0.Args[1]
+               y := v_0_1.AuxInt
                if !(y&0xFFFF == 0xFFFF) {
                        break
                }
@@ -16331,7 +21399,7 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  (Const32 [c]))
+       // match: (Trunc32to8 (Const32 [c]))
        // cond:
        // result: (Const8   [int64(int8(c))])
        for {
@@ -16344,7 +21412,7 @@ func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (Trunc32to8  (ZeroExt8to32  x))
+       // match: (Trunc32to8 (ZeroExt8to32 x))
        // cond:
        // result: x
        for {
@@ -16358,7 +21426,7 @@ func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc32to8  (SignExt8to32  x))
+       // match: (Trunc32to8 (SignExt8to32 x))
        // cond:
        // result: x
        for {
@@ -16372,7 +21440,7 @@ func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc32to8  (And32 (Const32 [y]) x))
+       // match: (Trunc32to8 (And32 (Const32 [y]) x))
        // cond: y&0xFF == 0xFF
        // result: (Trunc32to8 x)
        for {
@@ -16393,6 +21461,27 @@ func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc32to8 (And32 x (Const32 [y])))
+       // cond: y&0xFF == 0xFF
+       // result: (Trunc32to8 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFF == 0xFF) {
+                       break
+               }
+               v.reset(OpTrunc32to8)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpTrunc64to16(v *Value) bool {
@@ -16409,7 +21498,7 @@ func rewriteValuegeneric_OpTrunc64to16(v *Value) bool {
                v.AuxInt = int64(int16(c))
                return true
        }
-       // match: (Trunc64to16 (ZeroExt8to64  x))
+       // match: (Trunc64to16 (ZeroExt8to64 x))
        // cond:
        // result: (ZeroExt8to16  x)
        for {
@@ -16436,7 +21525,7 @@ func rewriteValuegeneric_OpTrunc64to16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc64to16 (SignExt8to64  x))
+       // match: (Trunc64to16 (SignExt8to64 x))
        // cond:
        // result: (SignExt8to16  x)
        for {
@@ -16484,6 +21573,27 @@ func rewriteValuegeneric_OpTrunc64to16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc64to16 (And64 x (Const64 [y])))
+       // cond: y&0xFFFF == 0xFFFF
+       // result: (Trunc64to16 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFFFF == 0xFFFF) {
+                       break
+               }
+               v.reset(OpTrunc64to16)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpTrunc64to32(v *Value) bool {
@@ -16500,7 +21610,7 @@ func rewriteValuegeneric_OpTrunc64to32(v *Value) bool {
                v.AuxInt = int64(int32(c))
                return true
        }
-       // match: (Trunc64to32 (ZeroExt8to64  x))
+       // match: (Trunc64to32 (ZeroExt8to64 x))
        // cond:
        // result: (ZeroExt8to32  x)
        for {
@@ -16540,7 +21650,7 @@ func rewriteValuegeneric_OpTrunc64to32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc64to32 (SignExt8to64  x))
+       // match: (Trunc64to32 (SignExt8to64 x))
        // cond:
        // result: (SignExt8to32  x)
        for {
@@ -16601,10 +21711,31 @@ func rewriteValuegeneric_OpTrunc64to32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc64to32 (And64 x (Const64 [y])))
+       // cond: y&0xFFFFFFFF == 0xFFFFFFFF
+       // result: (Trunc64to32 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFFFFFFFF == 0xFFFFFFFF) {
+                       break
+               }
+               v.reset(OpTrunc64to32)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
-       // match: (Trunc64to8  (Const64 [c]))
+       // match: (Trunc64to8 (Const64 [c]))
        // cond:
        // result: (Const8   [int64(int8(c))])
        for {
@@ -16617,7 +21748,7 @@ func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (Trunc64to8  (ZeroExt8to64  x))
+       // match: (Trunc64to8 (ZeroExt8to64 x))
        // cond:
        // result: x
        for {
@@ -16631,7 +21762,7 @@ func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc64to8  (SignExt8to64  x))
+       // match: (Trunc64to8 (SignExt8to64 x))
        // cond:
        // result: x
        for {
@@ -16645,7 +21776,7 @@ func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc64to8  (And64 (Const64 [y]) x))
+       // match: (Trunc64to8 (And64 (Const64 [y]) x))
        // cond: y&0xFF == 0xFF
        // result: (Trunc64to8 x)
        for {
@@ -16666,12 +21797,33 @@ func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc64to8 (And64 x (Const64 [y])))
+       // cond: y&0xFF == 0xFF
+       // result: (Trunc64to8 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFF == 0xFF) {
+                       break
+               }
+               v.reset(OpTrunc64to8)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpXor16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Xor16  (Const16 [c])  (Const16 [d]))
+       // match: (Xor16 (Const16 [c]) (Const16 [d]))
        // cond:
        // result: (Const16 [int64(int16(c^d))])
        for {
@@ -16689,25 +21841,22 @@ func rewriteValuegeneric_OpXor16(v *Value) bool {
                v.AuxInt = int64(int16(c ^ d))
                return true
        }
-       // match: (Xor16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Xor16 (Const16 <t> [c]) x)
+       // match: (Xor16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c^d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
-                       break
-               }
-               v.reset(OpXor16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c ^ d))
                return true
        }
        // match: (Xor16 x x)
@@ -16739,6 +21888,23 @@ func rewriteValuegeneric_OpXor16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Xor16 x (Const16 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Xor16 x (Xor16 x y))
        // cond:
        // result: y
@@ -16793,90 +21959,241 @@ func rewriteValuegeneric_OpXor16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor16 (Xor16 x y) y)
+       // match: (Xor16 (Xor16 y x) x)
        // cond:
-       // result: x
+       // result: y
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor16 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (Xor16 (Xor16 i:(Const16 <t>) z) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Xor16 i (Xor16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor16 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpXor16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor16 (Xor16 z i:(Const16 <t>)) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Xor16 i (Xor16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor16 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpXor16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor16 x (Xor16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Xor16 i (Xor16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor16 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpXor16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor16 x (Xor16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Xor16 i (Xor16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor16 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpXor16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor16 (Const16 <t> [c]) (Xor16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (Xor16 (Const16 <t> [int64(int16(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpXor16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c ^ d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Xor16 x l:(Xor16 _ _))
-       // cond: (x.Op != OpXor16 && x.Op != OpConst16)
-       // result: (Xor16 l x)
+       // match: (Xor16 (Const16 <t> [c]) (Xor16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Xor16 (Const16 <t> [int64(int16(c^d))]) x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpXor16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
                        break
                }
-               if !(x.Op != OpXor16 && x.Op != OpConst16) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpXor16)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c ^ d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Xor16 (Xor16 i:(Const16 <t>) z) x)
-       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
-       // result: (Xor16 i (Xor16 <t> z x))
+       // match: (Xor16 (Xor16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Xor16 (Const16 <t> [int64(int16(c^d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor16 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst16 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpXor16)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpXor16, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c ^ d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Xor16 (Const16 <t> [c]) (Xor16 (Const16 <t> [d]) x))
+       // match: (Xor16 (Xor16 x (Const16 <t> [d])) (Const16 <t> [c]))
        // cond:
        // result: (Xor16 (Const16 <t> [int64(int16(c^d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpXor16 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpXor16 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpXor16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c ^ d))
@@ -16889,7 +22206,7 @@ func rewriteValuegeneric_OpXor16(v *Value) bool {
 func rewriteValuegeneric_OpXor32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Xor32  (Const32 [c])  (Const32 [d]))
+       // match: (Xor32 (Const32 [c]) (Const32 [d]))
        // cond:
        // result: (Const32 [int64(int32(c^d))])
        for {
@@ -16907,25 +22224,22 @@ func rewriteValuegeneric_OpXor32(v *Value) bool {
                v.AuxInt = int64(int32(c ^ d))
                return true
        }
-       // match: (Xor32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Xor32 (Const32 <t> [c]) x)
+       // match: (Xor32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c^d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
-                       break
-               }
-               v.reset(OpXor32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c ^ d))
                return true
        }
        // match: (Xor32 x x)
@@ -16957,6 +22271,23 @@ func rewriteValuegeneric_OpXor32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Xor32 x (Const32 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Xor32 x (Xor32 x y))
        // cond:
        // result: y
@@ -17011,90 +22342,241 @@ func rewriteValuegeneric_OpXor32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor32 (Xor32 x y) y)
+       // match: (Xor32 (Xor32 y x) x)
        // cond:
-       // result: x
+       // result: y
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor32 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (Xor32 (Xor32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Xor32 i (Xor32 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpXor32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor32 (Xor32 z i:(Const32 <t>)) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Xor32 i (Xor32 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor32 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpXor32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor32 x (Xor32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Xor32 i (Xor32 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor32 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpXor32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor32 x (Xor32 z i:(Const32 <t>)))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Xor32 i (Xor32 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor32 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpXor32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor32 (Const32 <t> [c]) (Xor32 (Const32 <t> [d]) x))
+       // cond:
+       // result: (Xor32 (Const32 <t> [int64(int32(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor32 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst32 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpXor32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c ^ d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor32 (Const32 <t> [c]) (Xor32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Xor32 (Const32 <t> [int64(int32(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor32 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (Xor32 x l:(Xor32 _ _))
-       // cond: (x.Op != OpXor32 && x.Op != OpConst32)
-       // result: (Xor32 l x)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpXor32 {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpXor32 && x.Op != OpConst32) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpXor32)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c ^ d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Xor32 (Xor32 i:(Const32 <t>) z) x)
-       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Xor32 i (Xor32 <t> z x))
+       // match: (Xor32 (Xor32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Xor32 (Const32 <t> [int64(int32(c^d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor32 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst32 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpXor32)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpXor32, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c ^ d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Xor32 (Const32 <t> [c]) (Xor32 (Const32 <t> [d]) x))
+       // match: (Xor32 (Xor32 x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
        // result: (Xor32 (Const32 <t> [int64(int32(c^d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpXor32 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpXor32 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpXor32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c ^ d))
@@ -17107,7 +22589,7 @@ func rewriteValuegeneric_OpXor32(v *Value) bool {
 func rewriteValuegeneric_OpXor64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Xor64  (Const64 [c])  (Const64 [d]))
+       // match: (Xor64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c^d])
        for {
@@ -17125,25 +22607,22 @@ func rewriteValuegeneric_OpXor64(v *Value) bool {
                v.AuxInt = c ^ d
                return true
        }
-       // match: (Xor64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Xor64 (Const64 <t> [c]) x)
+       // match: (Xor64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c^d])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
-                       break
-               }
-               v.reset(OpXor64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = c ^ d
                return true
        }
        // match: (Xor64 x x)
@@ -17175,6 +22654,23 @@ func rewriteValuegeneric_OpXor64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Xor64 x (Const64 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Xor64 x (Xor64 x y))
        // cond:
        // result: y
@@ -17229,90 +22725,241 @@ func rewriteValuegeneric_OpXor64(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor64 (Xor64 x y) y)
+       // match: (Xor64 (Xor64 y x) x)
        // cond:
-       // result: x
+       // result: y
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor64 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (Xor64 (Xor64 i:(Const64 <t>) z) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Xor64 i (Xor64 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor64 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpXor64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor64 (Xor64 z i:(Const64 <t>)) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Xor64 i (Xor64 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor64 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpXor64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor64 x (Xor64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Xor64 i (Xor64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpXor64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (Xor64 x l:(Xor64 _ _))
-       // cond: (x.Op != OpXor64 && x.Op != OpConst64)
-       // result: (Xor64 l x)
+       // match: (Xor64 x (Xor64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Xor64 i (Xor64 <t> z x))
        for {
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpXor64 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor64 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpXor64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor64 (Const64 <t> [c]) (Xor64 (Const64 <t> [d]) x))
+       // cond:
+       // result: (Xor64 (Const64 <t> [c^d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor64 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst64 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpXor64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c ^ d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor64 (Const64 <t> [c]) (Xor64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Xor64 (Const64 <t> [c^d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpXor64 && x.Op != OpConst64) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpXor64)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c ^ d
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Xor64 (Xor64 i:(Const64 <t>) z) x)
-       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
-       // result: (Xor64 i (Xor64 <t> z x))
+       // match: (Xor64 (Xor64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Xor64 (Const64 <t> [c^d]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor64 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst64 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpXor64)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpXor64, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c ^ d
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Xor64 (Const64 <t> [c]) (Xor64 (Const64 <t> [d]) x))
+       // match: (Xor64 (Xor64 x (Const64 <t> [d])) (Const64 <t> [c]))
        // cond:
        // result: (Xor64 (Const64 <t> [c^d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpXor64 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpXor64 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst64 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpXor64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
                v0.AuxInt = c ^ d
@@ -17325,7 +22972,7 @@ func rewriteValuegeneric_OpXor64(v *Value) bool {
 func rewriteValuegeneric_OpXor8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Xor8   (Const8 [c])   (Const8 [d]))
+       // match: (Xor8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c^d))])
        for {
@@ -17343,28 +22990,25 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AuxInt = int64(int8(c ^ d))
                return true
        }
-       // match: (Xor8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Xor8  (Const8  <t> [c]) x)
+       // match: (Xor8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c^d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
-                       break
-               }
-               v.reset(OpXor8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c ^ d))
                return true
        }
-       // match: (Xor8  x x)
+       // match: (Xor8 x x)
        // cond:
        // result: (Const8  [0])
        for {
@@ -17376,7 +23020,7 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Xor8  (Const8  [0]) x)
+       // match: (Xor8 (Const8 [0]) x)
        // cond:
        // result: x
        for {
@@ -17393,7 +23037,24 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Xor8  x (Xor8  x y))
+       // match: (Xor8 x (Const8 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor8 x (Xor8 x y))
        // cond:
        // result: y
        for {
@@ -17411,7 +23072,7 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor8  x (Xor8  y x))
+       // match: (Xor8 x (Xor8 y x))
        // cond:
        // result: y
        for {
@@ -17429,7 +23090,7 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor8  (Xor8  x y) x)
+       // match: (Xor8 (Xor8 x y) x)
        // cond:
        // result: y
        for {
@@ -17447,42 +23108,51 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor8  (Xor8  x y) y)
+       // match: (Xor8 (Xor8 y x) x)
        // cond:
-       // result: x
+       // result: y
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor8 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (Xor8  x l:(Xor8  _ _))
-       // cond: (x.Op != OpXor8  && x.Op != OpConst8)
-       // result: (Xor8  l x)
+       // match: (Xor8 (Xor8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Xor8  i (Xor8  <t> z x))
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpXor8 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor8 {
                        break
                }
-               if !(x.Op != OpXor8 && x.Op != OpConst8) {
+               i := v_0.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
                }
                v.reset(OpXor8)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (Xor8  (Xor8  i:(Const8  <t>) z) x)
+       // match: (Xor8 (Xor8 z i:(Const8 <t>)) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Xor8  i (Xor8  <t> z x))
        for {
@@ -17490,12 +23160,12 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                if v_0.Op != OpXor8 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst8 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
@@ -17508,7 +23178,59 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Xor8  (Const8  <t> [c]) (Xor8  (Const8  <t> [d]) x))
+       // match: (Xor8 x (Xor8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Xor8  i (Xor8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor8 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpXor8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor8 x (Xor8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Xor8  i (Xor8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor8 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpXor8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor8 (Const8 <t> [c]) (Xor8 (Const8 <t> [d]) x))
        // cond:
        // result: (Xor8  (Const8  <t> [int64(int8(c^d))]) x)
        for {
@@ -17538,6 +23260,96 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Xor8 (Const8 <t> [c]) (Xor8 x (Const8 <t> [d])))
+       // cond:
+       // result: (Xor8  (Const8  <t> [int64(int8(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpXor8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c ^ d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor8 (Xor8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Xor8  (Const8  <t> [int64(int8(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpXor8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c ^ d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor8 (Xor8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Xor8  (Const8  <t> [int64(int8(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpXor8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c ^ d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpZero(v *Value) bool {
@@ -17700,7 +23512,7 @@ func rewriteValuegeneric_OpZeroExt32to64(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  (Const8  [c]))
+       // match: (ZeroExt8to16 (Const8 [c]))
        // cond:
        // result: (Const16 [int64( uint8(c))])
        for {
@@ -17713,7 +23525,7 @@ func rewriteValuegeneric_OpZeroExt8to16(v *Value) bool {
                v.AuxInt = int64(uint8(c))
                return true
        }
-       // match: (ZeroExt8to16  (Trunc16to8  x:(Rsh16Ux64 _ (Const64 [s]))))
+       // match: (ZeroExt8to16 (Trunc16to8 x:(Rsh16Ux64 _ (Const64 [s]))))
        // cond: s >= 8
        // result: x
        for {
@@ -17741,7 +23553,7 @@ func rewriteValuegeneric_OpZeroExt8to16(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  (Const8  [c]))
+       // match: (ZeroExt8to32 (Const8 [c]))
        // cond:
        // result: (Const32 [int64( uint8(c))])
        for {
@@ -17754,7 +23566,7 @@ func rewriteValuegeneric_OpZeroExt8to32(v *Value) bool {
                v.AuxInt = int64(uint8(c))
                return true
        }
-       // match: (ZeroExt8to32  (Trunc32to8  x:(Rsh32Ux64 _ (Const64 [s]))))
+       // match: (ZeroExt8to32 (Trunc32to8 x:(Rsh32Ux64 _ (Const64 [s]))))
        // cond: s >= 24
        // result: x
        for {
@@ -17782,7 +23594,7 @@ func rewriteValuegeneric_OpZeroExt8to32(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpZeroExt8to64(v *Value) bool {
-       // match: (ZeroExt8to64  (Const8  [c]))
+       // match: (ZeroExt8to64 (Const8 [c]))
        // cond:
        // result: (Const64 [int64( uint8(c))])
        for {
@@ -17795,7 +23607,7 @@ func rewriteValuegeneric_OpZeroExt8to64(v *Value) bool {
                v.AuxInt = int64(uint8(c))
                return true
        }
-       // match: (ZeroExt8to64  (Trunc64to8  x:(Rsh64Ux64 _ (Const64 [s]))))
+       // match: (ZeroExt8to64 (Trunc64to8 x:(Rsh64Ux64 _ (Const64 [s]))))
        // cond: s >= 56
        // result: x
        for {